diff --git a/.gitignore b/.gitignore index e642ec5351..cceb72f628 100644 --- a/.gitignore +++ b/.gitignore @@ -136,8 +136,21 @@ kokoro-multi-lang-v1_0 sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16 cmake-build-debug README-DEV.txt -*.rknn -*.jit + +# WASM combined build artifacts +wasm/combined/*.wasm +wasm/combined/sherpa-onnx-wasm-combined.js +build-wasm-combined/ +# Don't ignore the build script +!build-wasm-combined.sh + ##clion .idea +scripts/dotnet/examples/obj/Debug/net8.0/Common.AssemblyInfo.cs +scripts/dotnet/examples/obj/Debug/net8.0/Common.GeneratedMSBuildEditorConfig.editorconfig +scripts/dotnet/examples/obj/Debug/net8.0/Common.AssemblyInfoInputs.cache +wasm/asr/sherpa-onnx-wasm-main-asr.data +wasm/asr/sherpa-onnx-wasm-main-asr.js +wasm/asr/sherpa-onnx-wasm-main-asr.wasm + sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02 diff --git a/build-wasm-combined.sh b/build-wasm-combined.sh new file mode 100755 index 0000000000..4f23f0cd68 --- /dev/null +++ b/build-wasm-combined.sh @@ -0,0 +1,47 @@ +#!/bin/bash +# +# Copyright (c) 2024 Xiaomi Corporation + +# Exit on error and print commands +set -ex + +echo "=== Starting build process for sherpa-onnx WASM combined ===" + +# Set environment flag to indicate we're using this script +export SHERPA_ONNX_IS_USING_BUILD_WASM_SH=1 + +# Create build directory +mkdir -p build-wasm-combined +cd build-wasm-combined + +echo "=== Running CMake configuration ===" +# Configure with CMake +emcmake cmake \ + -DCMAKE_BUILD_TYPE=Release \ + -DSHERPA_ONNX_ENABLE_WASM=ON \ + -DSHERPA_ONNX_ENABLE_CHECK=OFF \ + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \ + -DSHERPA_ONNX_ENABLE_BINARY=OFF \ + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \ + -DSHERPA_ONNX_ENABLE_JNI=OFF \ + -DSHERPA_ONNX_ENABLE_C_API=ON \ + -DSHERPA_ONNX_ENABLE_TEST=OFF \ + -DSHERPA_ONNX_ENABLE_WASM_COMBINED=ON \ + -DSHERPA_ONNX_INSTALL_TO_REPO=ON \ + .. + +echo "=== Building the target ===" +# Build the target with full path to the target +emmake make -j $(nproc) sherpa-onnx-wasm-combined + +echo "=== Installing the files ===" +# Install the files +emmake make install/strip + +if [ $? -eq 0 ]; then + echo "=== Build completed successfully! ===" + echo "Files have been installed to bin/wasm/combined and copied to wasm/combined/" +else + echo "=== Build failed! Check the error messages above ===" + exit 1 +fi \ No newline at end of file diff --git a/wasm/CMakeLists.txt b/wasm/CMakeLists.txt index 0f18d3130b..2ea46af935 100644 --- a/wasm/CMakeLists.txt +++ b/wasm/CMakeLists.txt @@ -29,3 +29,7 @@ endif() if(SHERPA_ONNX_ENABLE_WASM_NODEJS) add_subdirectory(nodejs) endif() + +if(SHERPA_ONNX_ENABLE_WASM_COMBINED) + add_subdirectory(combined) +endif() diff --git a/wasm/combined/.gitignore b/wasm/combined/.gitignore new file mode 100644 index 0000000000..d055e657c8 --- /dev/null +++ b/wasm/combined/.gitignore @@ -0,0 +1,7 @@ +# Generated WASM files +*.wasm +sherpa-onnx-wasm-combined.js +sherpa-onnx-wasm-combined.data +# Local model files +*.onnx +*tokens.txt \ No newline at end of file diff --git a/wasm/combined/CMakeLists.txt b/wasm/combined/CMakeLists.txt new file mode 100644 index 0000000000..4cb9b65e99 --- /dev/null +++ b/wasm/combined/CMakeLists.txt @@ -0,0 +1,220 @@ +if(NOT $ENV{SHERPA_ONNX_IS_USING_BUILD_WASM_SH}) + message(FATAL_ERROR "Please use ./build-wasm-combined.sh to build for wasm combined module") +endif() + +# Check for asset directories +if(NOT IS_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/assets/asr") + message(WARNING "ASR assets directory not found at ${CMAKE_CURRENT_SOURCE_DIR}/assets/asr") +endif() + +if(NOT IS_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/assets/vad") + message(WARNING "VAD assets directory not found at ${CMAKE_CURRENT_SOURCE_DIR}/assets/vad") +endif() + +if(NOT IS_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/assets/tts") + message(WARNING "TTS assets directory not found at ${CMAKE_CURRENT_SOURCE_DIR}/assets/tts") +endif() + +if(NOT IS_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/assets/kws") + message(WARNING "KWS assets directory not found at ${CMAKE_CURRENT_SOURCE_DIR}/assets/kws") +endif() + +# Collect all exported functions from all modules +set(exported_functions + # Core utilities + CopyHeap + MyPrintOnlineASR + MyPrintVAD + MyPrintTTS + MyPrintSpeakerDiarization + MyPrintSpeechEnhancement + MyPrintKeywordSpotting + SherpaOnnxFileExists + + # Online ASR + SherpaOnnxCreateOnlineRecognizer + SherpaOnnxCreateOnlineStream + SherpaOnnxDecodeOnlineStream + SherpaOnnxDestroyOfflineStreamResultJson + SherpaOnnxDestroyOnlineRecognizer + SherpaOnnxDestroyOnlineRecognizerResult + SherpaOnnxDestroyOnlineStream + SherpaOnnxDestroyOnlineStreamResultJson + SherpaOnnxGetOfflineStreamResultAsJson + SherpaOnnxGetOnlineStreamResult + SherpaOnnxGetOnlineStreamResultAsJson + SherpaOnnxIsOnlineStreamReady + SherpaOnnxOnlineStreamAcceptWaveform + SherpaOnnxOnlineStreamInputFinished + SherpaOnnxOnlineStreamIsEndpoint + SherpaOnnxOnlineStreamReset + + # Offline ASR + SherpaOnnxCreateOfflineRecognizer + SherpaOnnxCreateOfflineStream + SherpaOnnxDecodeOfflineStream + SherpaOnnxDecodeMultipleOfflineStreams + SherpaOnnxDestroyOfflineRecognizer + SherpaOnnxDestroyOfflineRecognizerResult + SherpaOnnxDestroyOfflineStream + SherpaOnnxAcceptWaveformOffline + SherpaOnnxGetOfflineStreamResult + + # TTS + SherpaOnnxCreateOfflineTts + SherpaOnnxDestroyOfflineTts + SherpaOnnxDestroyOfflineTtsGeneratedAudio + SherpaOnnxOfflineTtsGenerate + SherpaOnnxOfflineTtsGenerateWithCallback + SherpaOnnxOfflineTtsSampleRate + SherpaOnnxOfflineTtsNumSpeakers + SherpaOnnxWriteWave + + # VAD + SherpaOnnxCreateCircularBuffer + SherpaOnnxDestroyCircularBuffer + SherpaOnnxCircularBufferPush + SherpaOnnxCircularBufferGet + SherpaOnnxCircularBufferFree + SherpaOnnxCircularBufferPop + SherpaOnnxCircularBufferSize + SherpaOnnxCircularBufferHead + SherpaOnnxCircularBufferReset + SherpaOnnxCreateVoiceActivityDetector + SherpaOnnxDestroyVoiceActivityDetector + SherpaOnnxVoiceActivityDetectorAcceptWaveform + SherpaOnnxVoiceActivityDetectorEmpty + SherpaOnnxVoiceActivityDetectorDetected + SherpaOnnxVoiceActivityDetectorPop + SherpaOnnxVoiceActivityDetectorClear + SherpaOnnxVoiceActivityDetectorFront + SherpaOnnxDestroySpeechSegment + SherpaOnnxVoiceActivityDetectorReset + SherpaOnnxVoiceActivityDetectorFlush + + # KWS + SherpaOnnxCreateKeywordSpotter + SherpaOnnxDestroyKeywordSpotter + SherpaOnnxCreateKeywordStream + SherpaOnnxIsKeywordStreamReady + SherpaOnnxDecodeKeywordStream + SherpaOnnxResetKeywordStream + SherpaOnnxGetKeywordResult + SherpaOnnxDestroyKeywordResult +) + +set(mangled_exported_functions) +foreach(x IN LISTS exported_functions) + list(APPEND mangled_exported_functions "_${x}") +endforeach() +list(JOIN mangled_exported_functions "," all_exported_functions) + +include_directories(${CMAKE_SOURCE_DIR}) +set(MY_FLAGS " -s FORCE_FILESYSTEM=1 -s INITIAL_MEMORY=512MB -s ALLOW_MEMORY_GROWTH=1") +string(APPEND MY_FLAGS " -sSTACK_SIZE=10485760 ") # 10MB +string(APPEND MY_FLAGS " -sASYNCIFY=1 -sFETCH=1 ") # For async loading +string(APPEND MY_FLAGS " -sEXPORTED_FUNCTIONS=[_malloc,_free,${all_exported_functions}] ") +# No preloaded assets - all models will be loaded dynamically +string(APPEND MY_FLAGS " -sEXPORTED_RUNTIME_METHODS=['ccall','stringToUTF8','setValue','getValue','lengthBytesUTF8','UTF8ToString','FS'] ") + +# Load precompiled assets using structured paths +if(IS_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/assets/asr") + string(APPEND MY_FLAGS "--preload-file ${CMAKE_CURRENT_SOURCE_DIR}/assets/asr@/sherpa_assets/asr ") +endif() + +if(IS_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/assets/vad") + string(APPEND MY_FLAGS "--preload-file ${CMAKE_CURRENT_SOURCE_DIR}/assets/vad@/sherpa_assets/vad ") +endif() + +if(IS_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/assets/tts") + string(APPEND MY_FLAGS "--preload-file ${CMAKE_CURRENT_SOURCE_DIR}/assets/tts@/sherpa_assets/tts ") +endif() + +if(IS_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/assets/kws") + string(APPEND MY_FLAGS "--preload-file ${CMAKE_CURRENT_SOURCE_DIR}/assets/kws@/sherpa_assets/kws ") +endif() + +if(IS_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/assets/speakers") + string(APPEND MY_FLAGS "--preload-file ${CMAKE_CURRENT_SOURCE_DIR}/assets/speakers@/sherpa_assets/speakers ") +endif() + +if(IS_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/assets/enhancement") + string(APPEND MY_FLAGS "--preload-file ${CMAKE_CURRENT_SOURCE_DIR}/assets/enhancement@/sherpa_assets/enhancement ") +endif() + +message(STATUS "MY_FLAGS: ${MY_FLAGS}") + +set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${MY_FLAGS}") +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${MY_FLAGS}") +set(CMAKE_EXECUTBLE_LINKER_FLAGS "${CMAKE_EXECUTBLE_LINKER_FLAGS} ${MY_FLAGS}") + +add_executable(sherpa-onnx-wasm-combined sherpa-onnx-wasm-combined.cc) +target_link_libraries(sherpa-onnx-wasm-combined sherpa-onnx-c-api) +install(TARGETS sherpa-onnx-wasm-combined DESTINATION bin/wasm/combined) + +install( + FILES + "$/sherpa-onnx-wasm-combined.js" + "$/sherpa-onnx-wasm-combined.wasm" + "$/sherpa-onnx-wasm-combined.data" + "sherpa-onnx-core.js" + "sherpa-onnx-asr.js" + "sherpa-onnx-vad.js" + "sherpa-onnx-tts.js" + "sherpa-onnx-kws.js" + "sherpa-onnx-speaker.js" + "sherpa-onnx-enhancement.js" + "sherpa-onnx-combined.js" + DESTINATION + bin/wasm/combined +) + +# Add option to install to original repo +option(SHERPA_ONNX_INSTALL_TO_REPO "Install compiled WASM files to original repo directory" OFF) +set(SHERPA_ONNX_REPO_PATH "${CMAKE_SOURCE_DIR}/wasm/combined" CACHE PATH "Path to original repo wasm directory") + +if(SHERPA_ONNX_INSTALL_TO_REPO) + # Add a custom target that will run after the installation + add_custom_target(install_to_repo ALL + COMMAND ${CMAKE_COMMAND} -E echo "Installing to original repo at ${SHERPA_ONNX_REPO_PATH}..." + COMMAND ${CMAKE_COMMAND} -E make_directory ${SHERPA_ONNX_REPO_PATH} + + # Copy the JS file + COMMAND ${CMAKE_COMMAND} + -DSRC_DIR=${CMAKE_BINARY_DIR}/bin + -DDEST_DIR=${SHERPA_ONNX_REPO_PATH} + -DCOPY_FILES="sherpa-onnx-wasm-combined.js" + -P ${CMAKE_CURRENT_SOURCE_DIR}/copy_with_confirm.cmake + + # Copy the WASM file + COMMAND ${CMAKE_COMMAND} + -DSRC_DIR=${CMAKE_BINARY_DIR}/bin + -DDEST_DIR=${SHERPA_ONNX_REPO_PATH} + -DCOPY_FILES="sherpa-onnx-wasm-combined.wasm" + -P ${CMAKE_CURRENT_SOURCE_DIR}/copy_with_confirm.cmake + + # Copy the DATA file + COMMAND ${CMAKE_COMMAND} + -DSRC_DIR=${CMAKE_BINARY_DIR}/bin + -DDEST_DIR=${SHERPA_ONNX_REPO_PATH} + -DCOPY_FILES="sherpa-onnx-wasm-combined.data" + -P ${CMAKE_CURRENT_SOURCE_DIR}/copy_with_confirm.cmake + + # Copy the index.html file + COMMAND ${CMAKE_COMMAND} + -DSRC_DIR=${CMAKE_CURRENT_SOURCE_DIR} + -DDEST_DIR=${SHERPA_ONNX_REPO_PATH} + -DCOPY_FILES="index.html" + -P ${CMAKE_CURRENT_SOURCE_DIR}/copy_with_confirm.cmake + + # Copy the JS library file + COMMAND ${CMAKE_COMMAND} + -DSRC_DIR=${CMAKE_CURRENT_SOURCE_DIR} + -DDEST_DIR=${SHERPA_ONNX_REPO_PATH} + -DCOPY_FILES="sherpa-onnx-combined.js" + -P ${CMAKE_CURRENT_SOURCE_DIR}/copy_with_confirm.cmake + + DEPENDS sherpa-onnx-wasm-combined + COMMENT "Checking and installing WASM files to original repo" + ) +endif() \ No newline at end of file diff --git a/wasm/combined/ISSUE.md b/wasm/combined/ISSUE.md new file mode 100644 index 0000000000..cdd8bec6ae --- /dev/null +++ b/wasm/combined/ISSUE.md @@ -0,0 +1,42 @@ +# Sherpa-ONNX WASM Combined Module Issue: Inconsistent Shared Module Context & HEAPF32 Access Failure + +## Problem Description + +The core issue is a fundamental limitation in the current Sherpa-ONNX WASM combined module architecture: **it fails to establish a reliably shared and synchronized WebAssembly (WASM) runtime context across the multiple, sequentially loaded JavaScript component files** (`sherpa-onnx-combined-core.js`, `sherpa-onnx-combined-asr.js`, etc.). Specifically, essential JavaScript views onto the WASM memory, like `HEAPF32`, are not consistently accessible across these script boundaries. + +### Background: WASM Memory and HEAP Views + +- **WASM Linear Memory**: WebAssembly modules operate on a contiguous block of memory. +- **Emscripten HEAP Views**: To allow JavaScript to interact with this memory, Emscripten (the compiler used) creates typed array views (e.g., `Float32Array`, `Int8Array`) pointing directly into this memory block. These views are assigned to the global `Module` object as properties like `Module.HEAPF32`, `Module.HEAP8`, `Module.HEAPU8`, etc. +- **Initialization**: These `HEAP*` views are crucial for JS-WASM communication. They are normally initialized by the main Emscripten glue code (`sherpa-onnx-wasm-combined.js` in this case) *after* the WASM memory buffer is allocated but *before* or *during* the `Module.onRuntimeInitialized` callback, signifying the runtime is ready. + +### Detailed Explanation of the Failure + +1. **Context/Scope Separation & HEAP Inaccessibility**: Despite all scripts referencing the global `window.Module`, they appear to operate within distinct execution contexts. Crucially, the standard `HEAP*` memory views (especially `HEAPF32`, essential for ASR audio data transfer) that *should* be initialized on `window.Module` by the main glue code are **not accessible or visible** within the context of the subsequently loaded component scripts (e.g., `sherpa-onnx-combined-core.js`). The repeated log messages `No suitable memory buffer found` and `HEAPF32 exists: false` from within `sherpa-onnx-combined-core.js` are direct evidence of this failure. + +2. **Sequential Loading Barrier**: The architecture loads functional components (ASR, VAD, etc.) as separate JS files *after* the main WASM module and its memory are expected to initialize. This sequential loading creates context boundaries that prevent the component scripts from accessing the already initialized `HEAP*` views on the `Module` object. + +3. **Initialization Callbacks Ineffective Across Contexts**: Callbacks like `onRuntimeInitialized` might fire in the main glue code's context, but this readiness state (including the availability of initialized `HEAP*` views) does not reliably propagate to the separate contexts of the component scripts. + +4. **Runtime Errors**: Consequently, operations requiring direct JS interaction with WASM memory via these views fail. For example, `OnlineStream.acceptWaveform` in ASR needs to write to `HEAPF32`. Since `HEAPF32` is inaccessible in the `asr.js` or `core.js` context, this fails, leading to downstream errors like `TypeError: asr.createStream is not a function` (as the recognizer likely failed during its own initialization which might require memory access). + +5. **Selective Functionality Failure (Evidence)**: Functionalities like TTS (`tts.html`) appear less affected. This suggests their JS-WASM interaction pattern doesn't critically rely on the *JavaScript context* having direct write access to `HEAPF32` in the same way streaming ASR does, further supporting that the issue is specific to the accessibility of these memory views across script contexts. + +### Impact + +- **Unreliable Functionality**: Core features requiring JS access to WASM memory views (like streaming ASR via `HEAPF32`) fail reliably. +- **Debugging Dead End**: Standard synchronization techniques are ineffective because the fundamental issue is the inaccessibility of necessary `HEAP*` views due to context separation. + +### Architectural Root Cause + +The multi-file JavaScript approach, combined with Emscripten's standard output, fails to guarantee that the essential `HEAP*` memory views initialized on the `Module` object are accessible from the separate JavaScript files loaded later. Each script effectively gets a view of the `Module` object that might lack these critical, dynamically initialized properties. + +### Potential Solutions + +1. **Unified Script (Likely Viable but with Drawbacks)**: Combine *all* JavaScript glue code (core, ASR, VAD, TTS, etc.) and the main Emscripten `Module` interaction into a **single, large JavaScript file**. This forces all code into the same execution context, ensuring consistent access to the initialized `Module` object and its `HEAP*` views. **Drawback**: Creates a potentially very large initial JS file, impacting load performance. + +2. **WASM Module Re-architecture (Complex)**: Fundamentally change how the C++ code is compiled, perhaps using Emscripten features explicitly designed for better JS module interoperability (e.g., `MODULARIZE=1`, ES6 modules output) that might handle state sharing differently. This likely requires significant changes to the build process and C++/JS interface. + +3. ~~Delayed Functionality Binding~~ (Proven Ineffective): Delaying execution doesn't solve the problem that the necessary `HEAP*` views are fundamentally inaccessible from within the component script contexts. + +This issue highlights a significant architectural challenge. The **Unified Script** approach appears the most practical path forward within the existing build system, despite performance implications. diff --git a/wasm/combined/README.md b/wasm/combined/README.md new file mode 100644 index 0000000000..87006f52fd --- /dev/null +++ b/wasm/combined/README.md @@ -0,0 +1,60 @@ +# Sherpa-ONNX WASM Combined Module + +This directory contains the WebAssembly (WASM) combined module for Sherpa-ONNX, which includes support for: +- Automatic Speech Recognition (ASR) +- Voice Activity Detection (VAD) +- Text-to-Speech (TTS) +- Keyword Spotting (KWS) +- Speaker Diarization +- Speech Enhancement + +## File Structure + +When built, the following files are generated: +- `sherpa-onnx-wasm-combined.js` - The main JavaScript glue code +- `sherpa-onnx-wasm-combined.wasm` - The WebAssembly binary +- `sherpa-onnx-wasm-combined.data` - The preloaded assets (models) +- JS library files: + - `sherpa-onnx-combined-core.js` - Core functionality + - `sherpa-onnx-combined-asr.js` - ASR functionality + - `sherpa-onnx-combined-vad.js` - VAD functionality + - `sherpa-onnx-combined-tts.js` - TTS functionality + - `sherpa-onnx-combined-kws.js` - Keyword Spotting functionality + - `sherpa-onnx-combined-speaker.js` - Speaker Diarization functionality + - `sherpa-onnx-combined-enhancement.js` - Speech Enhancement functionality + - `sherpa-onnx-combined.js` - Combined functionality wrapper + +## Building + +To build the WASM module: + +```bash +cd /path/to/sherpa-onnx +./build-wasm-combined.sh +``` + +This script will: +1. Create a `build-wasm-combined` directory +2. Configure CMake with the necessary options +3. Build the WASM module +4. Install the files to `bin/wasm/combined` +5. Copy the files to the original repo at `wasm/combined` + +## Important Notes + +1. **Large Asset Bundle**: The `.data` file can be very large (300MB+) as it contains all preloaded models. For production, consider using dynamic loading of models instead. + +2. **File Locations**: All files must be in the same directory for the WASM module to work correctly. The `.data` file MUST be in the same directory as the `.js` and `.wasm` files. + +3. **Local Testing**: To test locally, run a web server from the `wasm/combined` directory: + +```bash +cd /path/to/sherpa-onnx/wasm/combined +python -m http.server 8000 +``` + +Then open `http://localhost:8000` in your browser. + +## License + +Same as Sherpa-ONNX. \ No newline at end of file diff --git a/wasm/combined/assets/.gitignore b/wasm/combined/assets/.gitignore new file mode 100644 index 0000000000..a181d8d850 --- /dev/null +++ b/wasm/combined/assets/.gitignore @@ -0,0 +1,18 @@ +# Ignore all ONNX model files and subdirectories +*.onnx +asr/ +vad/ +tts/ +speakers/ +enhancement/ +kws/ + +# Ignore tokens files +*tokens.txt + +# Ignore temporary files +tmp/ + +# But keep the README.md and setup script +!README.md +!setup-assets.sh \ No newline at end of file diff --git a/wasm/combined/assets/.gitkeep b/wasm/combined/assets/.gitkeep new file mode 100644 index 0000000000..e69de29bb2 diff --git a/wasm/combined/assets/setup-assets.sh b/wasm/combined/assets/setup-assets.sh new file mode 100644 index 0000000000..69ea49d11e --- /dev/null +++ b/wasm/combined/assets/setup-assets.sh @@ -0,0 +1,208 @@ +#!/bin/bash +# +# Script to download and setup all required models for Sherpa-ONNX Combined WASM demo +# + +set -e + +# Parse command line arguments +FORCE=false +for arg in "$@" +do + case $arg in + --force) + FORCE=true + shift + ;; + esac +done + +echo "===== Setting up assets for Sherpa-ONNX Combined WASM Demo =====" +echo "" + +if [ "$FORCE" = true ]; then + echo "Force mode enabled - will delete existing assets" +fi + +# Create subdirectories for each model type +mkdir -p asr vad tts speakers enhancement kws + +# Function to check if a directory exists and has content +check_dir_not_empty() { + local dir="$1" + if [ -d "$dir" ] && [ "$(ls -A "$dir" 2>/dev/null)" ]; then + return 0 # Directory exists and not empty + else + return 1 # Directory doesn't exist or is empty + fi +} + +# Create a tmp directory for downloads +mkdir -p tmp +cd tmp + +# Download ASR models +echo "1. Setting up ASR Models (Speech Recognition)..." +if [ "$FORCE" = true ] || ! check_dir_not_empty "../asr"; then + # Clean up if force is enabled + if [ "$FORCE" = true ] && [ -d "../asr" ]; then + rm -rf "../asr" + mkdir -p "../asr" + fi + + # Download and extract ASR models + wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2 + tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2 + rm sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2 + + # Rename for compatibility + mv sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.int8.onnx ../asr/encoder.onnx + mv sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx ../asr/decoder.onnx + mv sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.int8.onnx ../asr/joiner.onnx + mv sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt ../asr/tokens.txt + rm -rf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/ + echo "ASR models downloaded and set up." +else + echo "ASR models already exist. Skipping download. Use --force to re-download." +fi + +# Download VAD model +echo "2. Setting up VAD Models (Voice Activity Detection)..." +if [ "$FORCE" = true ] || ! [ -f "../vad/silero_vad.onnx" ]; then + # Clean up if force is enabled + if [ "$FORCE" = true ] && [ -d "../vad" ]; then + rm -rf "../vad" + mkdir -p "../vad" + fi + + wget -q -O ../vad/silero_vad.onnx https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx + echo "VAD model downloaded and set up." +else + echo "VAD model already exists. Skipping download. Use --force to re-download." +fi + +# Download TTS models +echo "3. Setting up TTS Models (Text-to-Speech)..." +if [ "$FORCE" = true ] || ! check_dir_not_empty "../tts"; then + # Clean up if force is enabled + if [ "$FORCE" = true ] && [ -d "../tts" ]; then + rm -rf "../tts" + mkdir -p "../tts" + fi + + wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-amy-low.tar.bz2 + tar xvf vits-piper-en_US-amy-low.tar.bz2 + rm vits-piper-en_US-amy-low.tar.bz2 + + # Move required files to TTS directory + mv vits-piper-en_US-amy-low/en_US-amy-low.onnx ../tts/model.onnx + mv vits-piper-en_US-amy-low/tokens.txt ../tts/tokens.txt + + # Handle espeak-ng-data directory safely + if [ -d "../tts/espeak-ng-data" ] && [ "$FORCE" = false ]; then + echo "espeak-ng-data directory already exists. Skipping..." + else + # Remove existing directory if it exists and we're forcing + if [ -d "../tts/espeak-ng-data" ]; then + rm -rf "../tts/espeak-ng-data" + fi + mv vits-piper-en_US-amy-low/espeak-ng-data ../tts/ + fi + + # Create zip archive of espeak-ng-data if needed + if [ ! -f "../tts/espeak-ng-data.zip" ] || [ "$FORCE" = true ]; then + echo "Creating zip archive of espeak-ng-data..." + cd ../tts + # Remove existing zip if force is enabled + if [ -f "espeak-ng-data.zip" ] && [ "$FORCE" = true ]; then + rm espeak-ng-data.zip + fi + zip -r espeak-ng-data.zip espeak-ng-data/ + cd ../tmp + else + echo "espeak-ng-data.zip already exists. Skipping..." + fi + + rm -rf vits-piper-en_US-amy-low/ + echo "TTS models downloaded and set up." +else + echo "TTS models already exist. Skipping download. Use --force to re-download." +fi + +# Download speaker diarization models +echo "4. Setting up Speaker Diarization Models..." +if [ "$FORCE" = true ] || ! check_dir_not_empty "../speakers"; then + # Clean up if force is enabled + if [ "$FORCE" = true ] && [ -d "../speakers" ]; then + rm -rf "../speakers" + mkdir -p "../speakers" + fi + + # Download segmentation model + if [ "$FORCE" = true ] || ! [ -f "../speakers/segmentation.onnx" ]; then + wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2 + tar xvf sherpa-onnx-pyannote-segmentation-3-0.tar.bz2 + rm sherpa-onnx-pyannote-segmentation-3-0.tar.bz2 + mv sherpa-onnx-pyannote-segmentation-3-0/model.onnx ../speakers/segmentation.onnx + rm -rf sherpa-onnx-pyannote-segmentation-3-0 + fi + + # Download embedding model + if [ "$FORCE" = true ] || ! [ -f "../speakers/embedding.onnx" ]; then + wget -q -O ../speakers/embedding.onnx https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx + fi + echo "Speaker diarization models downloaded and set up." +else + echo "Speaker diarization models already exist. Skipping download. Use --force to re-download." +fi + +# Download speech enhancement model +echo "5. Setting up Speech Enhancement Models..." +if [ "$FORCE" = true ] || ! [ -f "../enhancement/gtcrn.onnx" ]; then + # Clean up if force is enabled + if [ "$FORCE" = true ] && [ -d "../enhancement" ]; then + rm -rf "../enhancement" + mkdir -p "../enhancement" + fi + + wget -q -O ../enhancement/gtcrn.onnx https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/gtcrn_simple.onnx + echo "Speech enhancement model downloaded and set up." +else + echo "Speech enhancement model already exists. Skipping download. Use --force to re-download." +fi + +# Download keyword spotting models +echo "6. Setting up Keyword Spotting Models..." +if [ "$FORCE" = true ] || ! check_dir_not_empty "../kws"; then + # Clean up if force is enabled + if [ "$FORCE" = true ] && [ -d "../kws" ]; then + rm -rf "../kws" + mkdir -p "../kws" + fi + + wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/kws-models/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2 + tar xvf sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2 + rm sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2 + + mv sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/encoder-epoch-12-avg-2-chunk-16-left-64.onnx ../kws/encoder.onnx + mv sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/decoder-epoch-12-avg-2-chunk-16-left-64.onnx ../kws/decoder.onnx + mv sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/joiner-epoch-12-avg-2-chunk-16-left-64.onnx ../kws/joiner.onnx + mv sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/tokens.txt ../kws/tokens.txt + rm -rf sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01 + echo "Keyword spotting models downloaded and set up." +else + echo "Keyword spotting models already exist. Skipping download. Use --force to re-download." +fi + +# Clean up tmp directory +cd .. +rm -rf tmp + +echo "" +echo "===== All assets have been downloaded and set up successfully! =====" +echo "" +echo "To run the demo:" +echo "1. Build the WASM module: ./build-wasm-combined.sh" +echo "2. Start a local server: cd ../.. && python3 -m http.server 8080" +echo "3. Open your browser and go to: http://localhost:8080/wasm/combined/" +echo "" \ No newline at end of file diff --git a/wasm/combined/copy_with_confirm.cmake b/wasm/combined/copy_with_confirm.cmake new file mode 100644 index 0000000000..1d87e87150 --- /dev/null +++ b/wasm/combined/copy_with_confirm.cmake @@ -0,0 +1,74 @@ +# This script copies files with confirmation for overwriting +# It is specifically used for the WASM combined build process in wasm/combined +# and should be kept in the wasm/combined directory. + +# Expected variables: +# SRC_DIR - source directory +# DEST_DIR - destination directory +# COPY_FILES - semicolon-separated list of files to copy + +# Print debug information +message(STATUS "Source directory: ${SRC_DIR}") +message(STATUS "Destination directory: ${DEST_DIR}") +message(STATUS "Files to copy: ${COPY_FILES}") + +# Verify source directory exists +if(NOT EXISTS "${SRC_DIR}") + message(FATAL_ERROR "Source directory does not exist: ${SRC_DIR}") +endif() + +# Verify destination directory exists or create it +if(NOT EXISTS "${DEST_DIR}") + message(STATUS "Creating destination directory: ${DEST_DIR}") + file(MAKE_DIRECTORY "${DEST_DIR}") +endif() + +# List source directory contents for debugging +message(STATUS "Contents of source directory:") +file(GLOB source_files "${SRC_DIR}/*") +foreach(file ${source_files}) + message(STATUS " ${file}") +endforeach() + +# Process each file (just one file in each call now) +foreach(file_name ${COPY_FILES}) + # Remove quotes if present + string(REGEX REPLACE "^\"(.*)\"$" "\\1" file_name "${file_name}") + + set(src_file "${SRC_DIR}/${file_name}") + set(dest_file "${DEST_DIR}/${file_name}") + + message(STATUS "Processing file: ${file_name}") + message(STATUS "Source file path: ${src_file}") + message(STATUS "Destination file path: ${dest_file}") + + # Verify source file exists + if(NOT EXISTS "${src_file}") + message(FATAL_ERROR "Source file does not exist: ${src_file}") + endif() + + # Check if the destination file exists + if(EXISTS "${dest_file}") + message(STATUS "File ${file_name} already exists in ${DEST_DIR}") + # Prompt for confirmation (this will be shown in terminal) + message(STATUS "Do you want to overwrite? [y/N]") + + # Read user input (works in interactive mode) + execute_process( + COMMAND ${CMAKE_COMMAND} -E echo_append "" + COMMAND /bin/bash -c "read -n 1 answer && echo $answer" + OUTPUT_VARIABLE answer + OUTPUT_STRIP_TRAILING_WHITESPACE + ) + + if("${answer}" STREQUAL "y" OR "${answer}" STREQUAL "Y") + message(STATUS "Overwriting ${dest_file}") + file(COPY "${src_file}" DESTINATION "${DEST_DIR}") + else() + message(STATUS "Skipping ${file_name}") + endif() + else() + message(STATUS "Copying ${file_name} to ${DEST_DIR}") + file(COPY "${src_file}" DESTINATION "${DEST_DIR}") + endif() +endforeach() \ No newline at end of file diff --git a/wasm/combined/demos/asr.html b/wasm/combined/demos/asr.html new file mode 100644 index 0000000000..355963c3af --- /dev/null +++ b/wasm/combined/demos/asr.html @@ -0,0 +1,324 @@ + + + + + + Sherpa-ONNX ASR Demo + + + + + + + + + + + + +

Sherpa-ONNX ASR Demo

+ + + +
Loading WebAssembly module...
+ +
+

Automatic Speech Recognition (ASR)

+ +
+

Model Configuration

+
+ + +
+
+ + + (The directory where model files will be stored) +
+
+ + +
+
+ + +
+
+ + +
+
+ + +
+
+ + +
+
+ + +
+
+ +
+ + + + +
+
Status: Not active
+
+
+ + + + diff --git a/wasm/combined/demos/assets b/wasm/combined/demos/assets new file mode 120000 index 0000000000..ec2e4be2f8 --- /dev/null +++ b/wasm/combined/demos/assets @@ -0,0 +1 @@ +../assets \ No newline at end of file diff --git a/wasm/combined/demos/common.css b/wasm/combined/demos/common.css new file mode 100644 index 0000000000..a06a3fc5bb --- /dev/null +++ b/wasm/combined/demos/common.css @@ -0,0 +1,372 @@ +body { + font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif; + max-width: 900px; + margin: 0 auto; + padding: 20px; + background-color: #f5f5f5; +} +h1, h2, h3 { + color: #333; +} +section { + margin: 20px 0; + padding: 15px; + border: 1px solid #ccc; + border-radius: 8px; + background-color: #fff; + box-shadow: 0 2px 4px rgba(0,0,0,0.1); +} +.controls { + display: flex; + flex-wrap: wrap; + gap: 10px; + margin-bottom: 10px; +} +.controls input { + flex-grow: 1; + padding: 8px; + border: 1px solid #ccc; + border-radius: 4px; +} +button { + padding: 8px 16px; + background-color: #4285f4; + color: white; + border: none; + border-radius: 4px; + cursor: pointer; + transition: background-color 0.3s; +} +button:hover { + background-color: #3367d6; +} +button:disabled { + background-color: #ccc; + cursor: not-allowed; +} +textarea { + width: 100%; + height: 100px; + padding: 8px; + border: 1px solid #ccc; + border-radius: 4px; + resize: vertical; +} +.loading { + display: none; +} +#status { + padding: 10px; + border-radius: 4px; + background-color: #e0f7fa; + margin-bottom: 20px; +} +.model-url { + width: 100%; + margin-bottom: 5px; +} +.module-info { + margin-top: 20px; + padding: 15px; + background-color: #f1f8e9; + border-radius: 4px; +} +.form-group { + margin-bottom: 12px; +} +.form-group label { + display: block; + margin-bottom: 5px; + font-weight: 500; +} +.form-group input[type="text"] { + width: 100%; + padding: 8px; + border: 1px solid #ccc; + border-radius: 4px; + box-sizing: border-box; +} +.form-group small { + display: block; + color: #666; + margin-top: 4px; + font-size: 0.85em; +} +.form-check { + margin-bottom: 8px; +} +.form-check input[type="checkbox"] { + margin-right: 8px; +} +select { + width: 100%; + padding: 8px; + border: 1px solid #ccc; + border-radius: 4px; + box-sizing: border-box; + margin-bottom: 8px; +} +.result-box { + min-height: 100px; + max-height: 200px; + overflow-y: auto; + border: 1px solid #ccc; + border-radius: 4px; + padding: 10px; + margin-top: 10px; + background-color: #f9f9f9; +} +.audio-output { + margin-top: 15px; +} +.audio-item { + display: flex; + align-items: center; + padding: 10px; + border: 1px solid #ddd; + border-radius: 4px; + margin-bottom: 10px; + background-color: #f9f9f9; +} +.audio-item audio { + margin-right: 10px; +} +.audio-item .audio-text { + flex-grow: 1; +} +.audio-item .delete-btn { + background-color: #f44336; + color: white; + border: none; + border-radius: 4px; + padding: 5px 10px; + cursor: pointer; +} +.unload-button { + background-color: #f44336; + color: white; +} +.unload-button:hover { + background-color: #d32f2f; +} +.nav-menu { + display: flex; + gap: 10px; + margin-bottom: 20px; + padding: 10px; + background-color: #333; + border-radius: 8px; +} +.nav-menu a { + color: white; + text-decoration: none; + padding: 5px 10px; + border-radius: 4px; +} +.nav-menu a:hover { + background-color: #555; +} +.nav-menu a.active { + background-color: #4285f4; +} + +/* Filesystem Validation Styles */ +#fs-validation-section { + margin-top: 30px; + border-top: 1px solid #ddd; + padding-top: 20px; +} + +.inspect-button { + background-color: #009688; +} + +.inspect-button:hover { + background-color: #00796b; +} + +.filesystem-inspector { + font-family: monospace; + background-color: #f5f5f5; + border: 1px solid #ddd; + border-radius: 4px; + padding: 10px; + margin: 10px 0; + max-height: 400px; + overflow-y: auto; +} + +.fs-section { + margin-bottom: 15px; + padding: 8px; + border-bottom: 1px dashed #ccc; +} + +.fs-section:last-child { + border-bottom: none; +} + +.fs-section h3 { + margin: 0 0 10px 0; + font-size: 1.1em; + color: #333; + background-color: #e0e0e0; + padding: 5px; + border-radius: 3px; +} + +.path-info { + margin: 8px 0; + padding: 5px 0 5px 10px; + border-left: 3px solid #4285f4; +} + +.error { + color: #cc0000; + font-weight: bold; + padding: 5px; + background-color: #ffeeee; + border-radius: 3px; +} + +.warning { + color: #cc7700; + padding: 5px; + background-color: #fff8e1; + border-radius: 3px; +} + +.filesystem-inspector ul { + margin: 5px 0; + padding-left: 20px; + list-style-type: square; +} + +.filesystem-inspector li { + padding: 2px 0; +} + +.info-box { + background-color: #e8f5e9; + border-left: 4px solid #4caf50; + padding: 12px 15px; + margin-bottom: 20px; + border-radius: 4px; +} + +.info-box p { + margin: 5px 0; + color: #2e7d32; +} + +.info-box code { + background-color: rgba(0, 0, 0, 0.06); + border-radius: 3px; + padding: 2px 4px; + font-family: monospace; +} + +.debug-button { + background-color: #673ab7; +} + +.debug-button:hover { + background-color: #512da8; +} + +#debug-console { + margin-top: 20px; + border: 1px solid #ccc; + border-radius: 4px; + padding: 10px; + background-color: #f8f8f8; +} + +.debug-log { + height: 300px; + overflow-y: auto; + font-family: monospace; + background-color: #1e1e1e; + color: #d4d4d4; + padding: 10px; + border-radius: 4px; + margin: 10px 0; + white-space: pre-wrap; + word-wrap: break-word; +} + +.log-entry { + margin-bottom: 5px; + border-bottom: 1px solid #333; + padding-bottom: 5px; +} + +.log-entry.error { + color: #f44336; + font-weight: bold; +} + +/* Tabs styling */ +.tabs { + display: flex; + margin-bottom: 20px; + border-bottom: 1px solid #ccc; +} + +.tab-button { + padding: 10px 20px; + background-color: #f0f0f0; + border: none; + border-radius: 4px 4px 0 0; + cursor: pointer; + margin-right: 5px; + color: #333; + font-weight: 500; +} + +.tab-button:hover { + background-color: #e0e0e0; +} + +.tab-button.active { + background-color: #4285f4; + color: white; +} + +.tab-content { + display: none; + padding: 15px 0; +} + +.tab-content.active { + display: block; +} + +/* Range slider styling */ +input[type="range"] { + width: 100%; + margin: 5px 0; +} + +input[type="range"] + span { + display: inline-block; + width: 40px; + text-align: center; + font-weight: bold; + color: #4285f4; +} + +/* Model parameters styling */ +.model-params { + background-color: #f9f9f9; + padding: 15px; + border-radius: 4px; + margin-bottom: 15px; +} + +#model-config h3 { + margin-top: 0; + margin-bottom: 15px; + padding-bottom: 8px; + border-bottom: 1px solid #e0e0e0; +} diff --git a/wasm/combined/demos/common.js b/wasm/combined/demos/common.js new file mode 100644 index 0000000000..05c06adfb0 --- /dev/null +++ b/wasm/combined/demos/common.js @@ -0,0 +1,291 @@ +// common.js - Shared utilities for Sherpa-ONNX WASM demos + +// --- Emscripten Module configuration --- +// This MUST run before sherpa-onnx-wasm-combined.js is loaded +var Module = Module || {}; +Module.locateFile = function(path, prefix) { + // If the path is the wasm or data file, load it from the parent directory + if (path.endsWith('.wasm') || path.endsWith('.data')) { + // Assumes demos are in a subdirectory (like /demos/) + // Adjust this path if your structure is different + return `../${path}`; + } + // Otherwise, use the default logic (usually prefix + path) + return prefix + path; +}; +// --- End Emscripten Module configuration --- + +// Set up initialization callback +window.onSherpaOnnxReady = function(success, error) { + if (success) { + console.log("All SherpaOnnx modules loaded successfully"); + initializeUI(); // This function would be defined in each individual demo file + } else { + console.error("Some SherpaOnnx modules failed to load:", error); + document.getElementById('status').textContent = + "Error loading some modules. Some features may not work correctly."; + document.getElementById('status').style.backgroundColor = "#ffcccc"; + + // Still try to initialize the UI with available modules + initializeUI(); + } +}; + +// Old-style module initialization for backward compatibility +window.onModuleReady = function() { + console.log("WASM module ready - waiting for all JS modules to load"); +}; + +// Shared audio context and microphone access +let audioContext; +let mediaStream; + +function setupAudioContext() { + if (!audioContext) { + audioContext = new (window.AudioContext || window.webkitAudioContext)({sampleRate: 16000}); + } + return audioContext; +} + +async function getMicrophoneInput() { + try { + const stream = await navigator.mediaDevices.getUserMedia({audio: true}); + const context = setupAudioContext(); + mediaStream = context.createMediaStreamSource(stream); + return stream; + } catch (error) { + console.error('Error accessing microphone:', error); + throw error; + } +} + +// Create unload button +function createUnloadButton(container, modelType, resource, statusElem) { + const button = document.createElement('button'); + button.textContent = `Unload ${modelType} Model`; + button.classList.add('unload-button'); + + button.addEventListener('click', function() { + if (resource) { + // Free the resource + resource.free(); + + // Call the appropriate cleanup method + if (modelType === 'ASR') { + SherpaOnnx.cleanupASR(); + } else if (modelType === 'TTS') { + SherpaOnnx.cleanupTTS(); + } else if (modelType === 'VAD') { + SherpaOnnx.cleanupVAD(); + } else if (modelType === 'KWS') { + SherpaOnnx.cleanupKWS(); + } + + // Update UI + button.disabled = true; + if (statusElem) { + statusElem.textContent = `Status: ${modelType} model unloaded`; + } + + console.log(`${modelType} model unloaded successfully`); + } + }); + + container.appendChild(button); + return button; +} + +// Validate WASM filesystem assets +function validateAssets(targetElement, moduleTypes = ['vad', 'tts', 'asr', 'kws']) { + if (!window.SherpaOnnx || !window.SherpaOnnx.FileSystem) { + targetElement.innerHTML = '
SherpaOnnx FileSystem not available
'; + return false; + } + + const fs = window.SherpaOnnx.FileSystem; + const container = document.createElement('div'); + container.className = 'filesystem-inspector'; + + // Check root directories + const rootSection = document.createElement('div'); + rootSection.className = 'fs-section'; + rootSection.innerHTML = '

Root Directory

'; + + try { + const rootFiles = fs.listFiles('/'); + + if (rootFiles.length === 0) { + rootSection.innerHTML += '
No files found in root directory
'; + } else { + const rootList = document.createElement('ul'); + rootFiles.forEach(file => { + const item = document.createElement('li'); + item.textContent = file; + rootList.appendChild(item); + }); + rootSection.appendChild(rootList); + } + } catch (e) { + rootSection.innerHTML += `
Error listing root directory: ${e.message}
`; + } + + container.appendChild(rootSection); + + // Check each module type + moduleTypes.forEach(moduleType => { + const section = document.createElement('div'); + section.className = 'fs-section'; + section.innerHTML = `

${moduleType.toUpperCase()} Assets

`; + + // Check various possible asset paths - updated to include sherpa_assets + const assetPaths = [ + `/sherpa_assets/${moduleType}`, // Added - This is the correct path per CMakeLists.txt + `/assets/${moduleType}`, + `/assets/${moduleType}/models`, + `/preloaded/${moduleType}` + ]; + + let assetsFound = false; + + assetPaths.forEach(assetPath => { + if (fs.fileExists(assetPath)) { + assetsFound = true; + const files = fs.listFiles(assetPath); + + const pathDiv = document.createElement('div'); + pathDiv.className = 'path-info'; + pathDiv.innerHTML = `${assetPath}:`; + + if (files.length === 0) { + pathDiv.innerHTML += ' Directory exists but is empty'; + } else { + const fileList = document.createElement('ul'); + files.forEach(file => { + const item = document.createElement('li'); + item.textContent = file; + fileList.appendChild(item); + }); + pathDiv.appendChild(fileList); + } + + section.appendChild(pathDiv); + } + }); + + if (!assetsFound) { + section.innerHTML += `
No ${moduleType} asset directories found
`; + } + + container.appendChild(section); + }); + + // Also check if sherpa_assets directory exists + try { + if (fs.fileExists('/sherpa_assets')) { + const sherpaSection = document.createElement('div'); + sherpaSection.className = 'fs-section'; + sherpaSection.innerHTML = '

Sherpa Assets Directory

'; + + const sherpaFiles = fs.listFiles('/sherpa_assets'); + if (sherpaFiles.length === 0) { + sherpaSection.innerHTML += '
Directory exists but is empty
'; + } else { + const pathDiv = document.createElement('div'); + pathDiv.className = 'path-info'; + pathDiv.innerHTML = '/sherpa_assets:'; + + const fileList = document.createElement('ul'); + sherpaFiles.forEach(file => { + const item = document.createElement('li'); + item.textContent = file; + + // Recursively show contents for each subdirectory + if (fs.fileExists(`/sherpa_assets/${file}`)) { + try { + const subFiles = fs.listFiles(`/sherpa_assets/${file}`); + if (subFiles.length > 0) { + const subList = document.createElement('ul'); + subFiles.forEach(subFile => { + const subItem = document.createElement('li'); + subItem.textContent = subFile; + subList.appendChild(subItem); + }); + item.appendChild(subList); + } + } catch (e) { + // Ignore errors for subdir listing + } + } + + fileList.appendChild(item); + }); + + pathDiv.appendChild(fileList); + sherpaSection.appendChild(pathDiv); + } + + container.appendChild(sherpaSection); + } + } catch (e) { + // Ignore errors if sherpa_assets doesn't exist + } + + // Clear and update target element + targetElement.innerHTML = ''; + targetElement.appendChild(container); + + // Add some basic styling + const style = document.createElement('style'); + style.textContent = ` + .filesystem-inspector { + font-family: monospace; + background-color: #f5f5f5; + border: 1px solid #ddd; + border-radius: 4px; + padding: 10px; + margin: 10px 0; + max-height: 400px; + overflow-y: auto; + } + .fs-section { + margin-bottom: 15px; + } + .fs-section h3 { + margin: 0 0 5px 0; + font-size: 1em; + color: #333; + } + .path-info { + margin: 5px 0; + padding-left: 10px; + } + .error { + color: #cc0000; + font-weight: bold; + } + .warning { + color: #cc7700; + } + ul { + margin: 5px 0; + padding-left: 20px; + } + `; + targetElement.appendChild(style); + + return true; +} + +// Create inspect assets button +function createInspectAssetsButton(container, targetElement) { + const button = document.createElement('button'); + button.textContent = 'Inspect Filesystem Assets'; + button.classList.add('inspect-button'); + + button.addEventListener('click', function() { + validateAssets(targetElement); + }); + + container.appendChild(button); + return button; +} diff --git a/wasm/combined/demos/index.html b/wasm/combined/demos/index.html new file mode 100644 index 0000000000..044250c2b9 --- /dev/null +++ b/wasm/combined/demos/index.html @@ -0,0 +1,76 @@ + + + + + + Sherpa-ONNX Demos + + + + + + + + + + + +

Sherpa-ONNX Demos

+ +
These demos showcase the modular capabilities of Sherpa-ONNX WebAssembly
+ +
+

Available Demos

+ + +

About These Demos

+

Each demo uses the unified Sherpa-ONNX combined library but focuses on a single module for easier testing and validation.

+

The combined approach allows using just the modules you need in your own applications.

+ +

Memory Management

+

Each demo includes an "Unload Model" button to demonstrate proper memory management:

+
    +
  • Explicit unloading frees WASM memory
  • +
  • Resources are tracked and properly cleaned up
  • +
  • Prevents memory leaks in long-running applications
  • +
+
+ +
+

WASM Filesystem Validation

+

Use this tool to verify that assets are correctly loaded in the WASM virtual filesystem.

+ +
+ +
+ +
+
+ + + + diff --git a/wasm/combined/demos/kws.html b/wasm/combined/demos/kws.html new file mode 100644 index 0000000000..4400c8ce7c --- /dev/null +++ b/wasm/combined/demos/kws.html @@ -0,0 +1,231 @@ + + + + + + Sherpa-ONNX KWS Demo + + + + + + + + + + + + +

Sherpa-ONNX KWS Demo

+ + + +
Loading WebAssembly module...
+ +
+

Keyword Spotting (KWS)

+ +
+

Model Configuration

+
+ + + (The directory where model files will be stored) +
+
+ + +
+
+ + +
+
+ + +
+
+ + +
+
+ + + Format: Phonetic tokens with spaces between letters, followed by @ and the keyword label +
+
+ + +
+
+ + +
+
+ +
+ + + + +
+
Status: Not active
+
+
+ + + + diff --git a/wasm/combined/demos/tts.html b/wasm/combined/demos/tts.html new file mode 100644 index 0000000000..01dab9660f --- /dev/null +++ b/wasm/combined/demos/tts.html @@ -0,0 +1,653 @@ + + + + + + Sherpa-ONNX TTS Demo + + + + + + + + + + + + +

Sherpa-ONNX TTS Demo

+ + + +
Loading WebAssembly module...
+ +
+

Text-to-Speech (TTS)

+ +
+

This demo uses either the preloaded TTS models from the /sherpa_assets/tts directory in the WASM filesystem or a custom model you upload.

+

Use the "Inspect Filesystem Assets" button below to verify available models.

+
+ +
+ + +
+ +
+

Use the preloaded VITS model included with Sherpa-ONNX.

+
+ +
+
+ + + The archive should contain the model file (.onnx), tokens.txt, and other required files. +
+
+ + + +
+ + +
+ +
+ + + + +
+
Status: Not active
+
+ + +
+ +
+

WASM Filesystem Validation

+

Use this tool to verify that TTS assets are correctly loaded in the WASM virtual filesystem.

+ +
+ +
+ +
+
+ + + + diff --git a/wasm/combined/demos/vad.html b/wasm/combined/demos/vad.html new file mode 100644 index 0000000000..29b82c16fd --- /dev/null +++ b/wasm/combined/demos/vad.html @@ -0,0 +1,280 @@ + + + + + + Sherpa-ONNX VAD Demo + + + + + + + + + + + + +

Sherpa-ONNX VAD Demo

+ + + +
Loading WebAssembly module...
+ +
+

Voice Activity Detection (VAD)

+ +
+

Model Configuration

+
+ + +
+
+ + + (The directory where model files will be stored) +
+
+ + +
+
+ + +
+ +

VAD Parameters

+
+ + + 0.5 +
+
+ + + 0.3 +
+
+ + + 0.1 +
+
+ +
+ + + + +
+
Status: Not active
+ +
+
+
+
Voice Activity Level
+
+
+
+ + + + diff --git a/wasm/combined/sherpa-onnx-combined-asr.js b/wasm/combined/sherpa-onnx-combined-asr.js new file mode 100644 index 0000000000..84ec2a487e --- /dev/null +++ b/wasm/combined/sherpa-onnx-combined-asr.js @@ -0,0 +1,1171 @@ +/** + * sherpa-onnx-asr.js + * + * Automatic Speech Recognition functionality for SherpaOnnx + * Requires sherpa-onnx-core.js to be loaded first + */ + +(function(global) { + // Ensure the namespace exists + if (!global.SherpaOnnx) { + console.error('SherpaOnnx namespace not found. Make sure to load sherpa-onnx-core.js first.'); + return; + } + + // Get a reference to the SherpaOnnx namespace + const SherpaOnnx = global.SherpaOnnx; + + // Create or use existing ASR namespace + SherpaOnnx.ASR = SherpaOnnx.ASR || {}; + + // Add readiness promise for WebAssembly module + SherpaOnnx.ASR.ready = new Promise((resolve, reject) => { + console.log('Waiting for SherpaOnnx core module initialization...'); + let attempt = 0; + const checkInterval = setInterval(() => { + attempt++; + console.log(`Attempt ${attempt}: Checking SherpaOnnx readiness status...`); + console.log(`SherpaOnnx.isReady: ${!!SherpaOnnx.isReady}`); + console.log(`window.Module exists: ${!!window.Module}`); + if (window.Module) { + console.log(`window.Module.calledRun: ${!!window.Module.calledRun}`); + console.log(`window.Module.HEAPF32 exists: ${!!window.Module.HEAPF32}`); + console.log(`window.Module properties:`, Object.keys(window.Module).slice(0, 10), `... (first 10 shown)`); + // Enhanced workaround for HEAPF32 not available + if (!window.Module.HEAPF32) { + try { + if (window.Module.HEAP8) { + window.Module.HEAPF32 = new Float32Array(window.Module.HEAP8.buffer); + console.log('Successfully initialized HEAPF32 dynamically from HEAP8.'); + } else if (window.Module.asm && window.Module.asm.memory) { + window.Module.HEAPF32 = new Float32Array(window.Module.asm.memory.buffer); + console.log('Successfully initialized HEAPF32 directly from WebAssembly memory.'); + } else if (window.Module.memory) { + window.Module.HEAPF32 = new Float32Array(window.Module.memory.buffer); + console.log('Successfully initialized HEAPF32 from Module.memory.'); + } else if (window.Module._memory) { + window.Module.HEAPF32 = new Float32Array(window.Module._memory.buffer); + console.log('Successfully initialized HEAPF32 from Module._memory.'); + } else if (typeof WebAssembly !== 'undefined' && WebAssembly.Memory && window.Module.asm) { + // Attempt to find memory instance in WebAssembly runtime + for (const prop in window.Module.asm) { + if (window.Module.asm[prop] instanceof WebAssembly.Memory) { + window.Module.HEAPF32 = new Float32Array(window.Module.asm[prop].buffer); + console.log(`Successfully initialized HEAPF32 from WebAssembly.Memory found in asm.${prop}.`); + break; + } + } + if (!window.Module.HEAPF32) { + console.warn('No WebAssembly.Memory found in asm properties.'); + } + } else { + console.warn('No suitable method found to initialize HEAPF32. Logging detailed Module info for debugging.'); + // Log detailed information about Module for debugging + console.log('Detailed Module properties:', Object.keys(window.Module)); + if (window.Module.asm) { + console.log('Module.asm properties:', Object.keys(window.Module.asm).slice(0, 10), '... (first 10 shown)'); + } + } + } catch (e) { + console.error('Failed to initialize HEAPF32 dynamically:', e); + } + // Log post-initialization status + console.log(`Post-workaround - window.Module.HEAPF32 exists: ${!!window.Module.HEAPF32}`); + } + } + if (SherpaOnnx.isReady || (window.Module && window.Module.calledRun)) { + console.log('Proceeding with ASR initialization. SherpaOnnx core module is ready or Module.calledRun is true.'); + SherpaOnnx.isReady = true; // Force set readiness flag + console.log('SherpaOnnx readiness flag manually set to true in ASR module.'); + clearInterval(checkInterval); + resolve(window.Module); + } else { + console.log('Still waiting for SherpaOnnx core module...'); + } + }, 500); + setTimeout(() => { + clearInterval(checkInterval); + console.error('SherpaOnnx core module initialization timed out after 60 seconds. Proceeding anyway if Module exists.'); + if (window.Module) { + // Enhanced workaround for HEAPF32 not available on timeout + if (!window.Module.HEAPF32) { + try { + if (window.Module.HEAP8) { + window.Module.HEAPF32 = new Float32Array(window.Module.HEAP8.buffer); + console.log('Successfully initialized HEAPF32 dynamically from HEAP8 on timeout.'); + } else if (window.Module.asm && window.Module.asm.memory) { + window.Module.HEAPF32 = new Float32Array(window.Module.asm.memory.buffer); + console.log('Successfully initialized HEAPF32 directly from WebAssembly memory on timeout.'); + } else if (window.Module.memory) { + window.Module.HEAPF32 = new Float32Array(window.Module.memory.buffer); + console.log('Successfully initialized HEAPF32 from Module.memory on timeout.'); + } else if (window.Module._memory) { + window.Module.HEAPF32 = new Float32Array(window.Module._memory.buffer); + console.log('Successfully initialized HEAPF32 from Module._memory on timeout.'); + } else if (typeof WebAssembly !== 'undefined' && WebAssembly.Memory && window.Module.asm) { + // Attempt to find memory instance in WebAssembly runtime + for (const prop in window.Module.asm) { + if (window.Module.asm[prop] instanceof WebAssembly.Memory) { + window.Module.HEAPF32 = new Float32Array(window.Module.asm[prop].buffer); + console.log(`Successfully initialized HEAPF32 from WebAssembly.Memory found in asm.${prop} on timeout.`); + break; + } + } + if (!window.Module.HEAPF32) { + console.warn('No WebAssembly.Memory found in asm properties on timeout.'); + } + } else { + console.warn('No suitable method found to initialize HEAPF32 on timeout. Logging detailed Module info for debugging.'); + // Log detailed information about Module for debugging + console.log('Detailed Module properties on timeout:', Object.keys(window.Module)); + if (window.Module.asm) { + console.log('Module.asm properties on timeout:', Object.keys(window.Module.asm).slice(0, 10), '... (first 10 shown)'); + } + } + } catch (e) { + console.error('Failed to initialize HEAPF32 dynamically on timeout:', e); + } + // Log post-initialization status + console.log(`Post-workaround on timeout - window.Module.HEAPF32 exists: ${!!window.Module.HEAPF32}`); + } + SherpaOnnx.isReady = true; // Force set readiness flag on timeout + console.log('SherpaOnnx readiness flag manually set to true on timeout in ASR module.'); + resolve(window.Module); + } else { + reject(new Error('SherpaOnnx core module initialization timed out after 60 seconds and Module not found')); + } + }, 60000); + }); + + // Define the ASR module functionality + SherpaOnnx.ASR = { + /** + * Load an ASR model from URLs + * @param {Object} modelConfig - Configuration for the model + * @returns {Promise} - Information about the loaded model + */ + loadModel: async function(modelConfig) { + const debug = modelConfig.debug || false; + const modelDir = modelConfig.modelDir || 'asr-models'; + + // First check for preloaded assets + if (!modelConfig.forceDownload) { + const assetPath = SherpaOnnx.Config.assetPaths.asr; + if (debug) console.log(`Checking for preloaded ASR assets at ${assetPath}`); + + if (SherpaOnnx.FileSystem.fileExists(assetPath)) { + const files = SherpaOnnx.FileSystem.listFiles(assetPath); + if (debug) console.log(`Found preloaded files: ${files.join(', ')}`); + + // Check for model files based on type + if (modelConfig.type === 'transducer' || !modelConfig.type) { + if (files.includes('encoder.onnx') && + files.includes('decoder.onnx') && + files.includes('joiner.onnx') && + files.includes('tokens.txt')) { + if (debug) console.log("Using preloaded transducer model"); + return { + modelDir: assetPath, + type: 'transducer', + actualPaths: { + encoder: `${assetPath}/encoder.onnx`, + decoder: `${assetPath}/decoder.onnx`, + joiner: `${assetPath}/joiner.onnx`, + tokens: `${assetPath}/tokens.txt` + }, + preloaded: true + }; + } + } else if (modelConfig.type === 'ctc') { + if (files.includes('model.onnx') && files.includes('tokens.txt')) { + if (debug) console.log("Using preloaded CTC model"); + return { + modelDir: assetPath, + type: 'ctc', + actualPaths: { + model: `${assetPath}/model.onnx`, + tokens: `${assetPath}/tokens.txt` + }, + preloaded: true + }; + } + } else if (modelConfig.type === 'paraformer') { + if (files.includes('encoder.onnx') && + files.includes('decoder.onnx') && + files.includes('tokens.txt')) { + if (debug) console.log("Using preloaded paraformer model"); + return { + modelDir: assetPath, + type: 'paraformer', + actualPaths: { + encoder: `${assetPath}/encoder.onnx`, + decoder: `${assetPath}/decoder.onnx`, + tokens: `${assetPath}/tokens.txt` + }, + preloaded: true + }; + } + } + + if (debug) console.log("Preloaded ASR assets found but missing required files for model type"); + } + } + + // Create directory if it doesn't exist + try { + SherpaOnnx.FileSystem.ensureDirectory(modelDir); + } catch(e) { + console.error(`Failed to create directory ${modelDir}:`, e); + } + + // Collection for actual file paths + const actualPaths = {}; + + // Load model files based on type + if (modelConfig.type === 'transducer') { + const results = await Promise.all([ + SherpaOnnx.FileSystem.loadFile(modelConfig.encoder || 'assets/asr/encoder.onnx', `${modelDir}/encoder.onnx`, debug), + SherpaOnnx.FileSystem.loadFile(modelConfig.decoder || 'assets/asr/decoder.onnx', `${modelDir}/decoder.onnx`, debug), + SherpaOnnx.FileSystem.loadFile(modelConfig.joiner || 'assets/asr/joiner.onnx', `${modelDir}/joiner.onnx`, debug), + SherpaOnnx.FileSystem.loadFile(modelConfig.tokens || 'assets/asr/tokens.txt', `${modelDir}/tokens.txt`, debug) + ]); + + // Collect actual paths + actualPaths.encoder = results[0].path; + actualPaths.decoder = results[1].path; + actualPaths.joiner = results[2].path; + actualPaths.tokens = results[3].path; + + } else if (modelConfig.type === 'paraformer') { + const results = await Promise.all([ + SherpaOnnx.FileSystem.loadFile(modelConfig.encoder || 'assets/asr/encoder.onnx', `${modelDir}/encoder.onnx`, debug), + SherpaOnnx.FileSystem.loadFile(modelConfig.decoder || 'assets/asr/decoder.onnx', `${modelDir}/decoder.onnx`, debug), + SherpaOnnx.FileSystem.loadFile(modelConfig.tokens || 'assets/asr/tokens.txt', `${modelDir}/tokens.txt`, debug) + ]); + + // Collect actual paths + actualPaths.encoder = results[0].path; + actualPaths.decoder = results[1].path; + actualPaths.tokens = results[2].path; + + } else if (modelConfig.type === 'ctc') { + const results = await Promise.all([ + SherpaOnnx.FileSystem.loadFile(modelConfig.model || 'assets/asr/model.onnx', `${modelDir}/model.onnx`, debug), + SherpaOnnx.FileSystem.loadFile(modelConfig.tokens || 'assets/asr/tokens.txt', `${modelDir}/tokens.txt`, debug) + ]); + + // Collect actual paths + actualPaths.model = results[0].path; + actualPaths.tokens = results[1].path; + } + + return { + modelDir, + type: modelConfig.type, + actualPaths + }; + }, + + /** + * Initialize online recognizer configuration in WASM + * @param {Object} config - ASR configuration + * @param {Object} Module - WebAssembly module + * @returns {number} - Pointer to the configuration in WASM + * @private + */ + _initOnlineRecognizerConfig: function(config, Module) { + if (!config) { + console.error('ASR config is null'); + return 0; + } + + try { + // Use window.Module instead of the parameter Module + const M = window.Module; + + // First, allocate all the strings we need + const allocatedStrings = {}; + + // Transducer model config + if (config.modelConfig.transducer) { + allocatedStrings.encoder = SherpaOnnx.Utils.allocateString(config.modelConfig.transducer.encoder, M); + allocatedStrings.decoder = SherpaOnnx.Utils.allocateString(config.modelConfig.transducer.decoder, M); + allocatedStrings.joiner = SherpaOnnx.Utils.allocateString(config.modelConfig.transducer.joiner, M); + } else { + allocatedStrings.encoder = SherpaOnnx.Utils.allocateString('', M); + allocatedStrings.decoder = SherpaOnnx.Utils.allocateString('', M); + allocatedStrings.joiner = SherpaOnnx.Utils.allocateString('', M); + } + + // Paraformer model config + if (config.modelConfig.paraformer) { + allocatedStrings.paraEncoder = SherpaOnnx.Utils.allocateString(config.modelConfig.paraformer.encoder, M); + allocatedStrings.paraDecoder = SherpaOnnx.Utils.allocateString(config.modelConfig.paraformer.decoder, M); + } else { + allocatedStrings.paraEncoder = SherpaOnnx.Utils.allocateString('', M); + allocatedStrings.paraDecoder = SherpaOnnx.Utils.allocateString('', M); + } + + // Zipformer2 CTC model config + if (config.modelConfig.zipformer2Ctc) { + allocatedStrings.zipformerModel = SherpaOnnx.Utils.allocateString(config.modelConfig.zipformer2Ctc.model, M); + } else { + allocatedStrings.zipformerModel = SherpaOnnx.Utils.allocateString('', M); + } + + // Tokens, provider, model_type, modeling_unit, bpe_vocab + allocatedStrings.tokens = SherpaOnnx.Utils.allocateString(config.modelConfig.tokens, M); + allocatedStrings.provider = SherpaOnnx.Utils.allocateString(config.modelConfig.provider || 'cpu', M); + allocatedStrings.modelType = SherpaOnnx.Utils.allocateString('', M); // Not used in JS API + allocatedStrings.modelingUnit = SherpaOnnx.Utils.allocateString('', M); // Not used in JS API + allocatedStrings.bpeVocab = SherpaOnnx.Utils.allocateString('', M); // Not used in JS API + + // Token buffer is not used in JS API + allocatedStrings.tokensBuffer = SherpaOnnx.Utils.allocateString('', M); + + // Decoding method + allocatedStrings.decodingMethod = SherpaOnnx.Utils.allocateString(config.decodingMethod || 'greedy_search', M); + + // Hotwords + allocatedStrings.hotwordsFile = SherpaOnnx.Utils.allocateString('', M); // Not used in JS API + allocatedStrings.hotwordsBuffer = SherpaOnnx.Utils.allocateString('', M); // Not used in JS API + + // Rule FSTs and FARs + allocatedStrings.ruleFsts = SherpaOnnx.Utils.allocateString('', M); // Not used in JS API + allocatedStrings.ruleFars = SherpaOnnx.Utils.allocateString('', M); // Not used in JS API + + // Now allocate the main config structure + // Size needs to match the C structure size + const configSize = 200; // Adjust if needed to match C struct + const configPtr = M._malloc(configSize); + + // Set feat_config fields (Starts populating the allocated memory) + let offset = 0; + M.setValue(configPtr + offset, config.featConfig.sampleRate || 16000, 'i32'); + offset += 4; + M.setValue(configPtr + offset, config.featConfig.featureDim || 80, 'i32'); + offset += 4; + + // Set model_config fields - transducer + M.setValue(configPtr + offset, allocatedStrings.encoder.ptr, 'i8*'); + offset += 4; + M.setValue(configPtr + offset, allocatedStrings.decoder.ptr, 'i8*'); + offset += 4; + M.setValue(configPtr + offset, allocatedStrings.joiner.ptr, 'i8*'); + offset += 4; + + // Set model_config fields - paraformer + M.setValue(configPtr + offset, allocatedStrings.paraEncoder.ptr, 'i8*'); + offset += 4; + M.setValue(configPtr + offset, allocatedStrings.paraDecoder.ptr, 'i8*'); + offset += 4; + + // Set model_config fields - zipformer2_ctc + M.setValue(configPtr + offset, allocatedStrings.zipformerModel.ptr, 'i8*'); + offset += 4; + + // Set remaining model_config fields + M.setValue(configPtr + offset, allocatedStrings.tokens.ptr, 'i8*'); + offset += 4; + M.setValue(configPtr + offset, config.modelConfig.numThreads || 1, 'i32'); + offset += 4; + M.setValue(configPtr + offset, allocatedStrings.provider.ptr, 'i8*'); + offset += 4; + M.setValue(configPtr + offset, config.modelConfig.debug || 0, 'i32'); + offset += 4; + M.setValue(configPtr + offset, allocatedStrings.modelType.ptr, 'i8*'); + offset += 4; + M.setValue(configPtr + offset, allocatedStrings.modelingUnit.ptr, 'i8*'); + offset += 4; + M.setValue(configPtr + offset, allocatedStrings.bpeVocab.ptr, 'i8*'); + offset += 4; + M.setValue(configPtr + offset, allocatedStrings.tokensBuffer.ptr, 'i8*'); + offset += 4; + M.setValue(configPtr + offset, 0, 'i32'); // tokens_buf_size + offset += 4; + + // Set recognizer config fields + M.setValue(configPtr + offset, allocatedStrings.decodingMethod.ptr, 'i8*'); + offset += 4; + M.setValue(configPtr + offset, config.maxActivePaths || 4, 'i32'); + offset += 4; + M.setValue(configPtr + offset, config.enableEndpoint || 1, 'i32'); + offset += 4; + M.setValue(configPtr + offset, config.rule1MinTrailingSilence || 2.4, 'float'); + offset += 4; + M.setValue(configPtr + offset, config.rule2MinTrailingSilence || 1.2, 'float'); + offset += 4; + M.setValue(configPtr + offset, config.rule3MinUtteranceLength || 300, 'float'); + offset += 4; + + // Set hotwords fields + M.setValue(configPtr + offset, allocatedStrings.hotwordsFile.ptr, 'i8*'); + offset += 4; + M.setValue(configPtr + offset, 0.0, 'float'); // hotwords_score + offset += 4; + + // Set CTC FST decoder config - graph and max_active + M.setValue(configPtr + offset, 0, 'i8*'); // graph + offset += 4; + M.setValue(configPtr + offset, 0, 'i32'); // max_active + offset += 4; + + // Set rule FSTs and FARs + M.setValue(configPtr + offset, allocatedStrings.ruleFsts.ptr, 'i8*'); + offset += 4; + M.setValue(configPtr + offset, allocatedStrings.ruleFars.ptr, 'i8*'); + offset += 4; + + // Set blank penalty + M.setValue(configPtr + offset, 0.0, 'float'); // blank_penalty + offset += 4; + + // Set hotwords buffer and size + M.setValue(configPtr + offset, allocatedStrings.hotwordsBuffer.ptr, 'i8*'); + offset += 4; + M.setValue(configPtr + offset, 0, 'i32'); // hotwords_buf_size + offset += 4; + + // Save the allocated strings for freeing later + M.SherpaOnnxAllocatedStrings = allocatedStrings; + + return configPtr; + } catch (error) { + console.error('Error initializing ASR config:', error); + return 0; + } + }, + + /** + * Free the configuration memory + * @param {number} configPtr - Pointer to the configuration + * @param {Object} Module - WebAssembly module + * @private + */ + _freeConfig: function(configPtr, Module) { + if (!configPtr) return; + + try { + // Free all allocated strings + if (Module.SherpaOnnxAllocatedStrings) { + for (const key in Module.SherpaOnnxAllocatedStrings) { + if (Module.SherpaOnnxAllocatedStrings[key].ptr) { + Module._free(Module.SherpaOnnxAllocatedStrings[key].ptr); + } + } + delete Module.SherpaOnnxAllocatedStrings; + } + + // Free the config structure itself + Module._free(configPtr); + } catch (error) { + console.error('Error freeing ASR config:', error); + } + }, + + /** + * Create an online ASR recognizer with the loaded model + * @param {Object} loadedModel - Model information returned by loadModel + * @param {Object} options - Additional configuration options + * @returns {Promise} - A promise resolving to an instance of OnlineRecognizer + */ + createOnlineRecognizer: async function(loadedModel, options = {}) { + // Wait for WebAssembly module to be ready + await SherpaOnnx.ASR.ready; + + const config = { + featConfig: { + sampleRate: options.sampleRate || 16000, + featureDim: options.featureDim || 80, + }, + modelConfig: { + tokens: loadedModel.actualPaths.tokens || `${loadedModel.modelDir}/tokens.txt`, + numThreads: options.numThreads || 1, + provider: 'cpu', // Force to cpu to avoid issues with quantized ONNX in WebAssembly + debug: options.debug !== undefined ? options.debug : 1, // Configurable debug + }, + decodingMethod: options.decodingMethod || 'greedy_search', + enableEndpoint: options.enableEndpoint === undefined ? 1 : options.enableEndpoint, + maxActivePaths: options.maxActivePaths || 4, + rule1MinTrailingSilence: options.rule1MinTrailingSilence || 2.4, + rule2MinTrailingSilence: options.rule2MinTrailingSilence || 1.2, + rule3MinUtteranceLength: options.rule3MinUtteranceLength || 300.0, + }; + + if (loadedModel.type === 'transducer') { + config.modelConfig.transducer = { + encoder: loadedModel.actualPaths.encoder || `${loadedModel.modelDir}/encoder.onnx`, + decoder: loadedModel.actualPaths.decoder || `${loadedModel.modelDir}/decoder.onnx`, + joiner: loadedModel.actualPaths.joiner || `${loadedModel.modelDir}/joiner.onnx`, + }; + } else if (loadedModel.type === 'paraformer') { + config.modelConfig.paraformer = { + encoder: loadedModel.actualPaths.encoder || `${loadedModel.modelDir}/encoder.onnx`, + decoder: loadedModel.actualPaths.decoder || `${loadedModel.modelDir}/decoder.onnx`, + }; + } else if (loadedModel.type === 'ctc') { + config.modelConfig.zipformer2Ctc = { + model: loadedModel.actualPaths.model || `${loadedModel.modelDir}/model.onnx`, + }; + } + + // Add readiness check using Module.calledRun + if (!window.Module || !window.Module.calledRun) { + console.error('CRITICAL: Emscripten runtime not initialized (Module.calledRun is not true) when creating recognizer.'); + throw new Error('WASM runtime not ready'); + } + console.log('Module.calledRun is true. Proceeding with recognizer creation.'); + console.log('Inspecting window.Module inside createOnlineRecognizer:', window.Module); + + const recognizer = new global.OnlineRecognizer(config, window.Module); // Use window.Module explicitly + + // Add detailed logging to inspect the recognizer object + console.log('Recognizer object created:', recognizer); + console.log('Checking if createStream method exists on recognizer:', typeof recognizer.createStream === 'function'); + if (typeof recognizer.createStream !== 'function') { + console.error('createStream method not found on recognizer. Attempting fallback instantiation.'); + // Fallback: Manually attach methods if they are missing due to instantiation issues + recognizer.createStream = function() { + console.log('Using fallback createStream method.'); + const streamHandle = window.Module.ccall( + 'SherpaOnnxCreateOnlineStream', + 'number', + ['number'], + [this.handle] + ); + const stream = new global.OnlineStream(streamHandle, this.Module); + // Track the stream for cleanup + this.streams.push(stream); + return stream; + }; + console.log('Fallback createStream method attached to recognizer.'); + } + + // Track the resource for cleanup if tracking function is available + if (SherpaOnnx.trackResource) { + SherpaOnnx.trackResource('asr', recognizer); + } + + return recognizer; + }, + + /** + * Create an offline ASR recognizer with the loaded model + * @param {Object} loadedModel - Model information returned by loadModel + * @param {Object} options - Additional configuration options + * @returns {OfflineRecognizer} - An instance of OfflineRecognizer + */ + createOfflineRecognizer: function(loadedModel, options = {}) { + const config = { + featConfig: { + sampleRate: options.sampleRate || 16000, + featureDim: options.featureDim || 80, + }, + modelConfig: { + tokens: loadedModel.actualPaths.tokens || `${loadedModel.modelDir}/tokens.txt`, + numThreads: options.numThreads || 1, + provider: options.provider || 'cpu', + debug: options.debug !== undefined ? options.debug : 1, // Configurable debug + }, + lmConfig: { + model: '', // No language model by default + scale: 1.0, + }, + decodingMethod: options.decodingMethod || 'greedy_search', + maxActivePaths: options.maxActivePaths || 4, + }; + + if (loadedModel.type === 'transducer') { + config.modelConfig.transducer = { + encoder: loadedModel.actualPaths.encoder || `${loadedModel.modelDir}/encoder.onnx`, + decoder: loadedModel.actualPaths.decoder || `${loadedModel.modelDir}/decoder.onnx`, + joiner: loadedModel.actualPaths.joiner || `${loadedModel.modelDir}/joiner.onnx`, + }; + } else if (loadedModel.type === 'paraformer') { + config.modelConfig.paraformer = { + model: loadedModel.actualPaths.model || `${loadedModel.modelDir}/model.onnx`, + }; + } else if (loadedModel.type === 'ctc') { + config.modelConfig.nemoCtc = { + model: loadedModel.actualPaths.model || `${loadedModel.modelDir}/model.onnx`, + }; + } + + const recognizer = new global.OfflineRecognizer(config, global.Module); + + // Track the resource for cleanup if tracking function is available + if (SherpaOnnx.trackResource) { + SherpaOnnx.trackResource('asr', recognizer); + } + + return recognizer; + }, + + /** + * Initialize offline recognizer configuration in WASM + * @param {Object} config - ASR configuration + * @param {Object} Module - WebAssembly module + * @returns {number} - Pointer to the configuration in WASM + * @private + */ + _initOfflineRecognizerConfig: function(config, Module) { + if (!config) { + console.error('ASR config is null'); + return 0; + } + + try { + // First, allocate all the strings we need + const allocatedStrings = {}; + + // Transducer model config + if (config.modelConfig.transducer) { + allocatedStrings.encoder = SherpaOnnx.Utils.allocateString(config.modelConfig.transducer.encoder, Module); + allocatedStrings.decoder = SherpaOnnx.Utils.allocateString(config.modelConfig.transducer.decoder, Module); + allocatedStrings.joiner = SherpaOnnx.Utils.allocateString(config.modelConfig.transducer.joiner, Module); + } else { + allocatedStrings.encoder = SherpaOnnx.Utils.allocateString('', Module); + allocatedStrings.decoder = SherpaOnnx.Utils.allocateString('', Module); + allocatedStrings.joiner = SherpaOnnx.Utils.allocateString('', Module); + } + + // Paraformer model config + if (config.modelConfig.paraformer) { + allocatedStrings.paraEncoder = SherpaOnnx.Utils.allocateString(config.modelConfig.paraformer.encoder, Module); + allocatedStrings.paraDecoder = SherpaOnnx.Utils.allocateString(config.modelConfig.paraformer.decoder, Module); + } else { + allocatedStrings.paraEncoder = SherpaOnnx.Utils.allocateString('', Module); + allocatedStrings.paraDecoder = SherpaOnnx.Utils.allocateString('', Module); + } + + // Zipformer2 CTC model config + if (config.modelConfig.zipformer2Ctc) { + allocatedStrings.zipformerModel = SherpaOnnx.Utils.allocateString(config.modelConfig.zipformer2Ctc.model, Module); + } else { + allocatedStrings.zipformerModel = SherpaOnnx.Utils.allocateString('', Module); + } + + // Tokens, provider, model_type, modeling_unit, bpe_vocab + allocatedStrings.tokens = SherpaOnnx.Utils.allocateString(config.modelConfig.tokens, Module); + allocatedStrings.provider = SherpaOnnx.Utils.allocateString(config.modelConfig.provider || 'cpu', Module); + allocatedStrings.modelType = SherpaOnnx.Utils.allocateString('', Module); // Not used in JS API + allocatedStrings.modelingUnit = SherpaOnnx.Utils.allocateString('', Module); // Not used in JS API + allocatedStrings.bpeVocab = SherpaOnnx.Utils.allocateString('', Module); // Not used in JS API + + // Token buffer is not used in JS API + allocatedStrings.tokensBuffer = SherpaOnnx.Utils.allocateString('', Module); + + // Decoding method + allocatedStrings.decodingMethod = SherpaOnnx.Utils.allocateString(config.decodingMethod || 'greedy_search', Module); + + // Hotwords + allocatedStrings.hotwordsFile = SherpaOnnx.Utils.allocateString('', Module); // Not used in JS API + allocatedStrings.hotwordsBuffer = SherpaOnnx.Utils.allocateString('', Module); // Not used in JS API + + // Rule FSTs and FARs + allocatedStrings.ruleFsts = SherpaOnnx.Utils.allocateString('', Module); // Not used in JS API + allocatedStrings.ruleFars = SherpaOnnx.Utils.allocateString('', Module); // Not used in JS API + + // Now allocate the main config structure + // Size needs to match the C structure size + const configSize = 200; // Adjust if needed to match C struct + const configPtr = Module._malloc(configSize); + + // Set feat_config fields (Starts populating the allocated memory) + let offset = 0; + Module.setValue(configPtr + offset, config.featConfig.sampleRate || 16000, 'i32'); + offset += 4; + Module.setValue(configPtr + offset, config.featConfig.featureDim || 80, 'i32'); + offset += 4; + + // Set model_config fields - transducer + Module.setValue(configPtr + offset, allocatedStrings.encoder.ptr, 'i8*'); + offset += 4; + Module.setValue(configPtr + offset, allocatedStrings.decoder.ptr, 'i8*'); + offset += 4; + Module.setValue(configPtr + offset, allocatedStrings.joiner.ptr, 'i8*'); + offset += 4; + + // Set model_config fields - paraformer + Module.setValue(configPtr + offset, allocatedStrings.paraEncoder.ptr, 'i8*'); + offset += 4; + Module.setValue(configPtr + offset, allocatedStrings.paraDecoder.ptr, 'i8*'); + offset += 4; + + // Set model_config fields - zipformer2_ctc + Module.setValue(configPtr + offset, allocatedStrings.zipformerModel.ptr, 'i8*'); + offset += 4; + + // Set remaining model_config fields + Module.setValue(configPtr + offset, allocatedStrings.tokens.ptr, 'i8*'); + offset += 4; + Module.setValue(configPtr + offset, config.modelConfig.numThreads || 1, 'i32'); + offset += 4; + Module.setValue(configPtr + offset, allocatedStrings.provider.ptr, 'i8*'); + offset += 4; + Module.setValue(configPtr + offset, config.modelConfig.debug || 0, 'i32'); + offset += 4; + Module.setValue(configPtr + offset, allocatedStrings.modelType.ptr, 'i8*'); + offset += 4; + Module.setValue(configPtr + offset, allocatedStrings.modelingUnit.ptr, 'i8*'); + offset += 4; + Module.setValue(configPtr + offset, allocatedStrings.bpeVocab.ptr, 'i8*'); + offset += 4; + Module.setValue(configPtr + offset, allocatedStrings.tokensBuffer.ptr, 'i8*'); + offset += 4; + Module.setValue(configPtr + offset, 0, 'i32'); // tokens_buf_size + offset += 4; + + // Set recognizer config fields + Module.setValue(configPtr + offset, allocatedStrings.decodingMethod.ptr, 'i8*'); + offset += 4; + Module.setValue(configPtr + offset, config.maxActivePaths || 4, 'i32'); + offset += 4; + Module.setValue(configPtr + offset, config.enableEndpoint || 1, 'i32'); + offset += 4; + Module.setValue(configPtr + offset, config.rule1MinTrailingSilence || 2.4, 'float'); + offset += 4; + Module.setValue(configPtr + offset, config.rule2MinTrailingSilence || 1.2, 'float'); + offset += 4; + Module.setValue(configPtr + offset, config.rule3MinUtteranceLength || 300, 'float'); + offset += 4; + + // Set hotwords fields + Module.setValue(configPtr + offset, allocatedStrings.hotwordsFile.ptr, 'i8*'); + offset += 4; + Module.setValue(configPtr + offset, 0.0, 'float'); // hotwords_score + offset += 4; + + // Set CTC FST decoder config - graph and max_active + Module.setValue(configPtr + offset, 0, 'i8*'); // graph + offset += 4; + Module.setValue(configPtr + offset, 0, 'i32'); // max_active + offset += 4; + + // Set rule FSTs and FARs + Module.setValue(configPtr + offset, allocatedStrings.ruleFsts.ptr, 'i8*'); + offset += 4; + Module.setValue(configPtr + offset, allocatedStrings.ruleFars.ptr, 'i8*'); + offset += 4; + + // Set blank penalty + Module.setValue(configPtr + offset, 0.0, 'float'); // blank_penalty + offset += 4; + + // Set hotwords buffer and size + Module.setValue(configPtr + offset, allocatedStrings.hotwordsBuffer.ptr, 'i8*'); + offset += 4; + Module.setValue(configPtr + offset, 0, 'i32'); // hotwords_buf_size + offset += 4; + + // Save the allocated strings for freeing later + Module.SherpaOnnxAllocatedStrings = allocatedStrings; + + return configPtr; + } catch (error) { + console.error('Error initializing ASR config:', error); + return 0; + } + } + }; + + /** + * OnlineRecognizer class for streaming speech recognition + */ + global.OnlineRecognizer = global.OnlineRecognizer || function(config, Module) { + this.Module = window.Module; // Explicitly use window.Module + this.config = config; + this.streams = []; // Track streams created by this recognizer + + // Initialize the configuration in WASM, explicitly passing window.Module + const configPtr = SherpaOnnx.ASR._initOnlineRecognizerConfig(config, window.Module); + + if (!configPtr) { + throw new Error("Failed to initialize ASR config pointer."); + } + + // Create the recognizer using window.Module + this.handle = window.Module.ccall( + 'SherpaOnnxCreateOnlineRecognizer', + 'number', + ['number'], + [configPtr] + ); + + // Free the configuration memory + SherpaOnnx.ASR._freeConfig(configPtr, Module); + + /** + * Create a stream for audio input + * @returns {OnlineStream} - A new stream for audio input + */ + this.createStream = function() { + const streamHandle = this.Module.ccall( + 'SherpaOnnxCreateOnlineStream', + 'number', + ['number'], + [this.handle] + ); + const stream = new global.OnlineStream(streamHandle, this.Module); + + // Track the stream for cleanup + this.streams.push(stream); + + return stream; + }; + + /** + * Check if the stream is ready for decoding + * @param {OnlineStream} stream - The stream to check + * @returns {boolean} - True if ready, false otherwise + */ + this.isReady = function(stream) { + return this.Module.ccall( + 'SherpaOnnxIsOnlineStreamReady', + 'number', + ['number', 'number'], + [this.handle, stream.handle] + ) === 1; + }; + + /** + * Decode the audio in the stream + * @param {OnlineStream} stream - The stream to decode + */ + this.decode = function(stream) { + this.Module.ccall( + 'SherpaOnnxDecodeOnlineStream', + 'void', + ['number', 'number'], + [this.handle, stream.handle] + ); + }; + + /** + * Check if an endpoint has been detected + * @param {OnlineStream} stream - The stream to check + * @returns {boolean} - True if endpoint detected, false otherwise + */ + this.isEndpoint = function(stream) { + return this.Module.ccall( + 'SherpaOnnxOnlineStreamIsEndpoint', + 'number', + ['number', 'number'], + [this.handle, stream.handle] + ) === 1; + }; + + /** + * Reset the stream + * @param {OnlineStream} stream - The stream to reset + */ + this.reset = function(stream) { + this.Module.ccall( + 'SherpaOnnxOnlineStreamReset', + 'void', + ['number', 'number'], + [this.handle, stream.handle] + ); + }; + + /** + * Get the recognition result + * @param {OnlineStream} stream - The stream to get results from + * @returns {Object} - Recognition result as JSON + */ + this.getResult = function(stream) { + const resultPtr = this.Module.ccall( + 'SherpaOnnxGetOnlineStreamResultAsJson', + 'number', + ['number', 'number'], + [this.handle, stream.handle] + ); + + const jsonStr = this.Module.UTF8ToString(resultPtr); + const result = JSON.parse(jsonStr); + + this.Module.ccall( + 'SherpaOnnxDestroyOnlineStreamResultJson', + 'null', + ['number'], + [resultPtr] + ); + + return result; + }; + + /** + * Free the recognizer and all associated streams + */ + this.free = function() { + // Free all streams first + for (let i = this.streams.length - 1; i >= 0; i--) { + if (this.streams[i]) { + this.streams[i].free(); + } + this.streams.splice(i, 1); + } + + // Then free the recognizer + if (this.handle) { + this.Module.ccall( + 'SherpaOnnxDestroyOnlineRecognizer', + 'null', + ['number'], + [this.handle] + ); + this.handle = null; + } + }; + }; + + /** + * OnlineStream class for handling streaming audio input + */ + global.OnlineStream = global.OnlineStream || function(handle, Module) { + this.handle = handle; + this.Module = Module || window.Module; // Use passed Module or fallback to window.Module + if (!this.Module || !this.Module.HEAPF32) { + console.warn('WebAssembly module not fully initialized in OnlineStream constructor. Will retry on method calls.'); + } + this.pointer = null; // buffer + this.n = 0; // buffer size + + /** + * Accept audio waveform data + * @param {number} sampleRate - Sample rate of the audio + * @param {Float32Array} samples - Audio samples in [-1, 1] range + */ + this.acceptWaveform = function(sampleRate, samples) { + if (!this.Module || !this.Module.HEAPF32) { + console.warn('WebAssembly module or HEAPF32 not available. Attempting to find initialized module.'); + this.Module = window.Module || global.Module; + if (!this.Module || !this.Module.HEAPF32) { + console.error('HEAPF32 still not available. Attempting to initialize memory view.'); + // Attempt to access or initialize HEAPF32 dynamically + if (this.Module && this.Module.HEAP8) { + try { + this.Module.HEAPF32 = new Float32Array(this.Module.HEAP8.buffer); + console.log('Successfully initialized HEAPF32 dynamically from HEAP8.'); + } catch (e) { + console.error('Failed to initialize HEAPF32 dynamically from HEAP8:', e); + // Last resort: Try to access WebAssembly memory directly + if (this.Module && this.Module.asm && this.Module.asm.memory) { + try { + this.Module.HEAPF32 = new Float32Array(this.Module.asm.memory.buffer); + console.log('Successfully initialized HEAPF32 directly from WebAssembly memory.'); + } catch (e2) { + console.error('Failed to initialize HEAPF32 directly from WebAssembly memory:', e2); + throw new Error('WebAssembly module or HEAPF32 not available after all retries. Ensure the WASM module is fully initialized.'); + } + } else { + throw new Error('WebAssembly module or HEAPF32 not available after retry. Ensure the WASM module is fully initialized.'); + } + } + } else { + throw new Error('WebAssembly module or HEAPF32 not available after retry. Ensure the WASM module is fully initialized.'); + } + } + } + if (this.n < samples.length) { + if (this.pointer) { + this.Module._free(this.pointer); + } + this.pointer = this.Module._malloc(samples.length * samples.BYTES_PER_ELEMENT); + this.n = samples.length; + } + + this.Module.HEAPF32.set(samples, this.pointer / samples.BYTES_PER_ELEMENT); + this.Module.ccall( + 'SherpaOnnxOnlineStreamAcceptWaveform', + 'void', + ['number', 'number', 'number', 'number'], + [this.handle, sampleRate, this.pointer, samples.length] + ); + }; + + /** + * Signal that input is finished + */ + this.inputFinished = function() { + this.Module.ccall( + 'SherpaOnnxOnlineStreamInputFinished', + 'void', + ['number'], + [this.handle] + ); + }; + + /** + * Free the stream + */ + this.free = function() { + if (this.handle) { + this.Module.ccall( + 'SherpaOnnxDestroyOnlineStream', + 'null', + ['number'], + [this.handle] + ); + this.handle = null; + + if (this.pointer) { + this.Module._free(this.pointer); + this.pointer = null; + this.n = 0; + } + } + }; + }; + + /** + * OfflineRecognizer class for non-streaming speech recognition + */ + global.OfflineRecognizer = global.OfflineRecognizer || function(config, Module) { + this.Module = Module; + this.config = config; + this.streams = []; // Track streams created by this recognizer + + // Initialize the configuration in WASM + const configPtr = SherpaOnnx.ASR._initOfflineRecognizerConfig(config, Module); + + // Create the recognizer + this.handle = Module.ccall( + 'SherpaOnnxCreateOfflineRecognizer', + 'number', + ['number'], + [configPtr] + ); + + // Free the configuration memory + SherpaOnnx.ASR._freeConfig(configPtr, Module); + + /** + * Create a stream for offline processing + * @returns {OfflineStream} - A new stream for offline processing + */ + this.createStream = function() { + const streamHandle = this.Module.ccall( + 'SherpaOnnxCreateOfflineStream', + 'number', + ['number'], + [this.handle] + ); + const stream = new global.OfflineStream(streamHandle, this.Module); + + // Track the stream for cleanup + this.streams.push(stream); + + return stream; + }; + + /** + * Decode the audio in the stream + * @param {OfflineStream} stream - The stream to decode + */ + this.decode = function(stream) { + this.Module.ccall( + 'SherpaOnnxDecodeOfflineStream', + 'void', + ['number', 'number'], + [this.handle, stream.handle] + ); + }; + + /** + * Free the recognizer and all associated streams + */ + this.free = function() { + // Free all streams first + for (let i = this.streams.length - 1; i >= 0; i--) { + if (this.streams[i]) { + this.streams[i].free(); + } + this.streams.splice(i, 1); + } + + // Then free the recognizer + if (this.handle) { + this.Module.ccall( + 'SherpaOnnxDestroyOfflineRecognizer', + 'null', + ['number'], + [this.handle] + ); + this.handle = null; + } + }; + }; + + /** + * OfflineStream class for handling non-streaming audio input + */ + global.OfflineStream = global.OfflineStream || function(handle, Module) { + this.handle = handle; + this.Module = Module; + + /** + * Accept audio waveform data + * @param {number} sampleRate - Sample rate of the audio + * @param {Float32Array} samples - Audio samples in [-1, 1] range + */ + this.acceptWaveform = function(sampleRate, samples) { + const pointer = this.Module._malloc(samples.length * samples.BYTES_PER_ELEMENT); + this.Module.HEAPF32.set(samples, pointer / samples.BYTES_PER_ELEMENT); + + this.Module.ccall( + 'SherpaOnnxAcceptWaveformOffline', + 'void', + ['number', 'number', 'number', 'number'], + [this.handle, sampleRate, pointer, samples.length] + ); + + this.Module._free(pointer); + }; + + /** + * Get the recognition result + * @returns {Object} - Recognition result as JSON + */ + this.getResult = function() { + const resultPtr = this.Module.ccall( + 'SherpaOnnxGetOfflineStreamResultAsJson', + 'number', + ['number'], + [this.handle] + ); + + const jsonStr = this.Module.UTF8ToString(resultPtr); + const result = JSON.parse(jsonStr); + + this.Module.ccall( + 'SherpaOnnxDestroyOfflineStreamResultJson', + 'null', + ['number'], + [resultPtr] + ); + + return result; + }; + + /** + * Free the stream + */ + this.free = function() { + if (this.handle) { + this.Module.ccall( + 'SherpaOnnxDestroyOfflineStream', + 'null', + ['number'], + [this.handle] + ); + this.handle = null; + } + }; + }; + + // For Node.js environments + if (typeof module !== 'undefined' && module.exports) { + module.exports = SherpaOnnx; + } +})(typeof window !== 'undefined' ? window : global); \ No newline at end of file diff --git a/wasm/combined/sherpa-onnx-combined-core.js b/wasm/combined/sherpa-onnx-combined-core.js new file mode 100644 index 0000000000..0e35a7e7fc --- /dev/null +++ b/wasm/combined/sherpa-onnx-combined-core.js @@ -0,0 +1,649 @@ +/** + * sherpa-onnx-core.js + * + * Core functionality for the SherpaOnnx WASM modules + */ + +(function(global) { + // Create the SherpaOnnx namespace if it doesn't exist + global.SherpaOnnx = global.SherpaOnnx || {}; + + // Create main namespace + const SherpaOnnx = {}; + + // Check if Module already exists and extend it + if (typeof window.Module !== 'undefined') { + console.log('Module already defined at script load time. Checking initialization status...'); + console.log('Module properties at load:', Object.keys(window.Module).slice(0, 10), '... (first 10 shown)'); + console.log('Module.onRuntimeInitialized exists:', !!window.Module.onRuntimeInitialized); + console.log('Module.calledRun status at load:', !!window.Module.calledRun); + // Immediate attempt to initialize HEAPF32 at load time + if (!window.Module.HEAPF32) { + try { + if (window.Module.HEAP8) { + window.Module.HEAPF32 = new Float32Array(window.Module.HEAP8.buffer); + console.log('Successfully initialized HEAPF32 dynamically from HEAP8 at load time in core module.'); + } else if (window.Module.asm && window.Module.asm.memory) { + window.Module.HEAPF32 = new Float32Array(window.Module.asm.memory.buffer); + console.log('Successfully initialized HEAPF32 directly from WebAssembly memory at load time in core module.'); + } else if (window.Module.memory) { + window.Module.HEAPF32 = new Float32Array(window.Module.memory.buffer); + console.log('Successfully initialized HEAPF32 from Module.memory at load time in core module.'); + } else if (window.Module._memory) { + window.Module.HEAPF32 = new Float32Array(window.Module._memory.buffer); + console.log('Successfully initialized HEAPF32 from Module._memory at load time in core module.'); + } else if (typeof WebAssembly !== 'undefined' && WebAssembly.Memory && window.Module.asm) { + for (const prop in window.Module.asm) { + if (window.Module.asm[prop] instanceof WebAssembly.Memory) { + window.Module.HEAPF32 = new Float32Array(window.Module.asm[prop].buffer); + console.log(`Successfully initialized HEAPF32 from WebAssembly.Memory found in asm.${prop} at load time in core module.`); + break; + } + } + if (!window.Module.HEAPF32) { + console.warn('No WebAssembly.Memory found in asm properties at load time in core module.'); + } + } else { + console.warn('No standard method found to initialize HEAPF32 at load time in core module.'); + // Simplified deeper inspection of window.Module for any memory buffer + console.log('Inspecting window.Module for potential memory buffers...'); + let foundBuffer = false; + for (const prop in window.Module) { + try { + if (window.Module[prop] && typeof window.Module[prop] === 'object') { + if (window.Module[prop] instanceof ArrayBuffer) { + window.Module.HEAPF32 = new Float32Array(window.Module[prop]); + console.log(`Initialized HEAPF32 from ArrayBuffer in Module.${prop} at load time.`); + foundBuffer = true; + break; + } else if (window.Module[prop].buffer && window.Module[prop].buffer instanceof ArrayBuffer) { + window.Module.HEAPF32 = new Float32Array(window.Module[prop].buffer); + console.log(`Initialized HEAPF32 from buffer in Module.${prop}.buffer at load time.`); + foundBuffer = true; + break; + } + } + } catch (e) { + console.error(`Error inspecting Module.${prop} at load time:`, e.message); + } + } + if (!foundBuffer) { + console.log('No suitable memory buffer found in deep inspection at load time.'); + } + } + } catch (e) { + console.error('Failed to initialize HEAPF32 dynamically at load time in core module:', e.message); + } + console.log(`Post-workaround at load time - HEAPF32 exists: ${!!window.Module.HEAPF32}`); + } + const originalOnRuntimeInitialized = window.Module.onRuntimeInitialized; + window.Module.onRuntimeInitialized = function() { + console.log('onRuntimeInitialized triggered. SherpaOnnx Core module initialized.'); + console.log('Module.calledRun status when onRuntimeInitialized triggered:', !!window.Module.calledRun); + console.log('Checking for HEAPF32 availability after initialization:', !!window.Module.HEAPF32); + global.SherpaOnnx.isReady = true; // Custom readiness flag + console.log('SherpaOnnx readiness flag set to true'); + if (originalOnRuntimeInitialized) { + console.log('Calling original onRuntimeInitialized callback.'); + originalOnRuntimeInitialized(); + } + if (global.onModuleReady) { + console.log('Calling global.onModuleReady callback.'); + global.onModuleReady(); + } + }; + console.log('onRuntimeInitialized hook set. Waiting for initialization...'); + // Additional check if calledRun is already true but onRuntimeInitialized hasn't fired + if (window.Module.calledRun && !global.SherpaOnnx.isReady) { + console.warn('Module.calledRun is true but onRuntimeInitialized has not fired. Forcing readiness check.'); + // Start a continuous check for HEAPF32 availability + let heapCheckAttempts = 0; + const heapCheckInterval = setInterval(() => { + heapCheckAttempts++; + console.log(`HEAPF32 check attempt ${heapCheckAttempts}: HEAPF32 exists: ${!!window.Module.HEAPF32}`); + if (!window.Module.HEAPF32) { + try { + if (window.Module.HEAP8) { + window.Module.HEAPF32 = new Float32Array(window.Module.HEAP8.buffer); + console.log('Initialized HEAPF32 from HEAP8 during continuous check.'); + } else if (window.Module.asm && window.Module.asm.memory) { + window.Module.HEAPF32 = new Float32Array(window.Module.asm.memory.buffer); + console.log('Initialized HEAPF32 from WebAssembly memory during continuous check.'); + } else if (window.Module.memory) { + window.Module.HEAPF32 = new Float32Array(window.Module.memory.buffer); + console.log('Initialized HEAPF32 from Module.memory during continuous check.'); + } else if (window.Module._memory) { + window.Module.HEAPF32 = new Float32Array(window.Module._memory.buffer); + console.log('Initialized HEAPF32 from Module._memory during continuous check.'); + } else if (typeof WebAssembly !== 'undefined' && WebAssembly.Memory && window.Module.asm) { + for (const prop in window.Module.asm) { + if (window.Module.asm[prop] instanceof WebAssembly.Memory) { + window.Module.HEAPF32 = new Float32Array(window.Module.asm[prop].buffer); + console.log(`Initialized HEAPF32 from WebAssembly.Memory in asm.${prop} during continuous check.`); + break; + } + } + if (!window.Module.HEAPF32) { + console.warn('No WebAssembly.Memory found in asm properties during continuous check.'); + } + } else { + console.warn('No standard method found to initialize HEAPF32 during continuous check.'); + // Simplified deeper inspection during continuous check + console.log(`Check ${heapCheckAttempts}: Inspecting window.Module for memory buffers...`); + let foundBuffer = false; + for (const prop in window.Module) { + try { + if (window.Module[prop] && typeof window.Module[prop] === 'object') { + if (window.Module[prop] instanceof ArrayBuffer) { + window.Module.HEAPF32 = new Float32Array(window.Module[prop]); + console.log(`Initialized HEAPF32 from ArrayBuffer in Module.${prop} during check ${heapCheckAttempts}.`); + foundBuffer = true; + break; + } else if (window.Module[prop].buffer && window.Module[prop].buffer instanceof ArrayBuffer) { + window.Module.HEAPF32 = new Float32Array(window.Module[prop].buffer); + console.log(`Initialized HEAPF32 from buffer in Module.${prop}.buffer during check ${heapCheckAttempts}.`); + foundBuffer = true; + break; + } + } + } catch (e) { + console.error(`Error inspecting Module.${prop} during check ${heapCheckAttempts}:`, e.message); + } + } + if (!foundBuffer) { + console.log(`Check ${heapCheckAttempts}: No suitable memory buffer found in deep inspection.`); + } + } + } catch (e) { + console.error('Failed to initialize HEAPF32 during continuous check:', e.message); + } + } + if (window.Module.HEAPF32 || heapCheckAttempts >= 10) { + clearInterval(heapCheckInterval); + console.log(`Stopping HEAPF32 checks after ${heapCheckAttempts} attempts. Final status - HEAPF32 exists: ${!!window.Module.HEAPF32}`); + if (!window.Module.HEAPF32) { + console.error('HEAPF32 initialization failed after maximum attempts. Proceeding anyway to unblock UI.'); + } + if (!global.SherpaOnnx.isReady) { + global.SherpaOnnx.isReady = true; + console.log('SherpaOnnx readiness flag manually set to true after HEAPF32 check.'); + if (global.onModuleReady) { + console.log('Calling global.onModuleReady callback after HEAPF32 check.'); + global.onModuleReady(); + } + } + } + }, 500); // Check every 500ms up to 10 attempts (5 seconds) + setTimeout(() => { + if (!global.SherpaOnnx.isReady) { + console.error('onRuntimeInitialized still not triggered after extended delay. Manually setting readiness flag.'); + global.SherpaOnnx.isReady = true; + console.log('SherpaOnnx readiness flag manually set to true due to timeout.'); + if (!window.Module.HEAPF32) { + console.log('HEAPF32 still not available after timeout. Final attempt to initialize.'); + try { + if (window.Module.HEAP8) { + window.Module.HEAPF32 = new Float32Array(window.Module.HEAP8.buffer); + console.log('Initialized HEAPF32 from HEAP8 during final timeout check.'); + } else if (window.Module.asm && window.Module.asm.memory) { + window.Module.HEAPF32 = new Float32Array(window.Module.asm.memory.buffer); + console.log('Initialized HEAPF32 from WebAssembly memory during final timeout check.'); + } + } catch (e) { + console.error('Final attempt to initialize HEAPF32 failed:', e.message); + } + console.log(`Final status after timeout - HEAPF32 exists: ${!!window.Module.HEAPF32}`); + } + if (global.onModuleReady) { + console.log('Calling global.onModuleReady callback due to forced readiness.'); + global.onModuleReady(); + } + } + }, 10000); // Wait 10 seconds before forcing readiness + } + } else { + console.log('Module not defined at script load time. Setting up property trap...'); + console.log("Waiting for Module to be defined..."); + Object.defineProperty(global, 'Module', { + set: function(mod) { + console.log('Module being set. Capturing initialization...'); + console.log('Module properties at set:', Object.keys(mod).slice(0, 10), '... (first 10 shown)'); + this._Module = mod; + console.log("Module defined, waiting for runtime initialization"); + const originalOnRuntimeInitialized = mod.onRuntimeInitialized; + mod.onRuntimeInitialized = function() { + console.log("onRuntimeInitialized triggered from setter. SherpaOnnx Core module initialized."); + global.SherpaOnnx.isReady = true; // Custom readiness flag + console.log("SherpaOnnx readiness flag set to true from setter"); + if (originalOnRuntimeInitialized) { + console.log('Calling original onRuntimeInitialized callback from setter.'); + originalOnRuntimeInitialized(); + } + if (global.onModuleReady) { + console.log('Calling global.onModuleReady callback from setter.'); + global.onModuleReady(); + } + }; + console.log('onRuntimeInitialized hook set in setter. Waiting for initialization...'); + }, + get: function() { + return this._Module; + } + }); + } + + // Configuration for SherpaOnnx + SherpaOnnx.Config = { + // Paths to preloaded assets + assetPaths: { + vad: '/sherpa_assets/vad', + tts: '/sherpa_assets/tts', + asr: '/sherpa_assets/asr', + kws: '/sherpa_assets/kws', + speakers: '/sherpa_assets/speakers', + enhancement: '/sherpa_assets/enhancement' + }, + + // Allow users to override the location of the data file + setDataFileLocation: function(location) { + if (global.Module) { + const originalLocateFile = global.Module.locateFile; + global.Module.locateFile = function(path) { + if (path.endsWith('.data')) { + return location; + } + return typeof originalLocateFile === 'function' + ? originalLocateFile(path) + : path; + }; + } + } + }; + + // Common utilities for memory management and shared functionality + SherpaOnnx.Utils = { + /** + * Free configuration memory allocated in WASM + * @param {Object} config - Configuration object with allocated memory + * @param {Object} Module - WebAssembly module + */ + freeConfig: function(config, Module) { + if (!config) return; + + if ('buffer' in config) { + Module._free(config.buffer); + } + + if ('sileroVad' in config) { + this.freeConfig(config.sileroVad, Module); + } + + if (config.ptr) { + Module._free(config.ptr); + } + }, + + /** + * Copy string to WASM heap and return pointer + * @param {string} str - String to allocate + * @param {Object} Module - WebAssembly module + * @returns {Object} Object with pointer and length + */ + allocateString: function(str, Module) { + if (!str) str = ''; + const strLen = Module.lengthBytesUTF8(str) + 1; + const strPtr = Module._malloc(strLen); + Module.stringToUTF8(str, strPtr, strLen); + return { ptr: strPtr, len: strLen }; + } + }; + + // File system utilities for model loading + SherpaOnnx.FileSystem = { + /** + * Check if a file exists in the filesystem + * @param {string} path - Path to check + * @returns {boolean} - Whether the file exists + */ + fileExists: function(path) { + try { + global.Module.FS.lookupPath(path); + return true; + } catch (e) { + return false; + } + }, + + /** + * Get a valid asset path for a given module type and filename + * @param {string} moduleType - Type of module (vad, tts, kws, asr) + * @param {string} filename - Name of the file to look for + * @returns {string} - The first valid path where the asset exists + */ + getAssetPath: function(moduleType, filename) { + // Check in the preloaded assets directory structure + const paths = [ + `/assets/${moduleType}/${filename}`, + `/assets/${moduleType}/models/${filename}`, + `/preloaded/${moduleType}/${filename}` + ]; + + // Return first path that exists + for (const path of paths) { + if (this.fileExists(path)) { + return path; + } + } + + // Default fallback + return `/assets/${moduleType}/${filename}`; + }, + + /** + * List files in a directory in the filesystem + * @param {string} dirPath - Directory path + * @returns {Array} List of files + */ + listFiles: function(dirPath) { + try { + if (!global.Module || !global.Module.FS) return []; + return global.Module.FS.readdir(dirPath).filter( + name => name !== '.' && name !== '..' + ); + } catch (e) { + console.warn(`Error listing files in ${dirPath}: ${e.message}`); + return []; + } + }, + + /** + * Safely load a file with error handling and fallback options + * @param {string} path - Path to load + * @param {string} moduleType - Type of module (vad, tts, kws, asr) for alternative paths + * @param {object} options - Options for loading + * @param {boolean} [options.tryAlternativePaths=true] - Whether to try alternative paths if first load fails + * @param {any} [options.defaultValue=null] - Default value to return if loading fails + * @returns {object} - Result object with success flag, data, and error message + */ + safeLoadFile: function(path, moduleType, options = {}) { + const { tryAlternativePaths = true, defaultValue = null } = options; + let result = { + success: false, + data: defaultValue, + error: null + }; + + try { + // First try loading from the original path + if (this.fileExists(path)) { + const data = global.Module.FS.readFile(path); + result.success = true; + result.data = data; + console.log(`Successfully loaded file from: ${path}`); + return result; + } + + // If the file doesn't exist at the original path and we should try alternatives + if (tryAlternativePaths && moduleType) { + // Extract filename from path + const filename = path.split('/').pop(); + const alternativePath = this.getAssetPath(moduleType, filename); + + if (this.fileExists(alternativePath) && alternativePath !== path) { + const data = global.Module.FS.readFile(alternativePath); + result.success = true; + result.data = data; + console.log(`Loaded file from alternative path: ${alternativePath}`); + return result; + } + } + + // If we get here, we couldn't find the file anywhere + result.error = `File not found at path: ${path} or any alternative locations`; + console.warn(result.error); + return result; + } catch (error) { + result.error = `Error loading file: ${error.message || error}`; + console.error(result.error); + return result; + } + }, + + /** + * Safely load a file from a URL into the WASM file system + * @param {string} url - URL to fetch the file from + * @param {string} localPath - Path where to save the file in WASM filesystem + * @param {boolean} debug - Whether to output debug logs + * @returns {Promise} - Info about the loaded file + */ + loadFile: async function(url, localPath, debug = false) { + try { + if (debug) console.log(`Loading file from ${url} to ${localPath}`); + + // Create parent directory if needed + const lastSlash = localPath.lastIndexOf('/'); + if (lastSlash > 0) { + const dirPath = localPath.substring(0, lastSlash); + this.ensureDirectory(dirPath); + } + + // Fetch the file + if (debug) console.log(`Fetching ${url}`); + const response = await fetch(url); + + if (!response.ok) { + throw new Error(`Failed to fetch ${url}: ${response.status} ${response.statusText}`); + } + + const buffer = await response.arrayBuffer(); + + if (!buffer || buffer.byteLength === 0) { + throw new Error(`Empty response from ${url}`); + } + + if (debug) console.log(`Downloaded ${url}, size: ${buffer.byteLength} bytes`); + + // Write the file + global.Module.FS.writeFile(localPath, new Uint8Array(buffer)); + + return { + success: true, + path: localPath + }; + } catch (error) { + console.error(`Error loading ${url}:`, error); + return { + success: false, + error: error.message + }; + } + }, + + /** + * Create directory and parents if needed + * @param {string} dirPath - Directory path + */ + ensureDirectory: function(dirPath) { + if (!dirPath) return; + + // Skip if it's the root directory + if (dirPath === '/') return; + + try { + // Check if directory exists + const stat = global.Module.FS.stat(dirPath); + if (stat.isDir) return; // Already exists + throw new Error(`Path exists but is not a directory: ${dirPath}`); + } catch (error) { + // If error is that the path doesn't exist, create it + if (error.errno === 44 || error.errno === 2 || error.message.includes('No such file or directory')) { + // Ensure parent directory exists first + const parentDir = dirPath.substring(0, dirPath.lastIndexOf('/')); + if (parentDir) this.ensureDirectory(parentDir); + + // Create this directory + global.Module.FS.mkdir(dirPath); + return; + } + + // For other errors, rethrow + throw error; + } + }, + + /** + * Extract a zip file to the WASM filesystem + * @param {ArrayBuffer} zipData - The zip file data + * @param {string} targetPath - Target extraction path + * @param {boolean} debug - Enable debug logging + * @returns {Promise} - Result of extraction + */ + extractZip: async function(zipData, targetPath, debug = false) { + if (debug) console.log(`Extracting zip to ${targetPath}`); + + try { + // Make sure the base directory exists + this.ensureDirectory(targetPath); + + // Load JSZip from CDN if needed + if (typeof JSZip === 'undefined') { + if (debug) console.log("Loading JSZip library from CDN"); + await new Promise((resolve, reject) => { + const script = document.createElement('script'); + script.src = 'https://cdnjs.cloudflare.com/ajax/libs/jszip/3.10.1/jszip.min.js'; + script.onload = resolve; + script.onerror = reject; + document.head.appendChild(script); + }); + + if (typeof JSZip === 'undefined') { + throw new Error("Failed to load JSZip library"); + } + } + + // Process the zip file + const zip = await JSZip.loadAsync(zipData); + const extractedFiles = []; + + // First, create all directories + for (const path in zip.files) { + const file = zip.files[path]; + + if (file.dir) { + this.ensureDirectory(`${targetPath}/${path}`); + } else { + const dirPath = path.substring(0, path.lastIndexOf('/')); + if (dirPath) { + this.ensureDirectory(`${targetPath}/${dirPath}`); + } + } + } + + // Now extract all files + for (const path in zip.files) { + const file = zip.files[path]; + if (file.dir) continue; // Skip directories, already created + + try { + // Create the full path + const fullPath = `${targetPath}/${path}`; + + // Extract and write the file + const content = await file.async('arraybuffer'); + global.Module.FS.writeFile(fullPath, new Uint8Array(content)); + extractedFiles.push(fullPath); + } catch (fileErr) { + console.error(`Error extracting file ${path}: ${fileErr.message}`); + } + } + + if (debug) console.log(`Successfully extracted ${extractedFiles.length} files`); + return { success: true, files: extractedFiles }; + } catch (error) { + console.error(`Error extracting zip: ${error.message}`); + return { success: false, error: error.message }; + } + }, + + /** + * Debug the filesystem + * @param {string} [path="/"] - Path to list + */ + debugFilesystem: function(path = "/") { + try { + console.log(`--- Filesystem contents of ${path} ---`); + if (!global.Module || !global.Module.FS) { + console.log("Module.FS not available"); + return; + } + + const entries = this.listFiles(path); + console.log(entries); + + // Show preloaded asset directories + Object.values(SherpaOnnx.Config.assetPaths).forEach(assetPath => { + if (this.fileExists(assetPath)) { + console.log(`--- ${assetPath} contents ---`); + console.log(this.listFiles(assetPath)); + } + }); + } catch (err) { + console.error("Error debugging filesystem:", err); + } + } + }; + + // Resource tracking for cleanup + SherpaOnnx.Resources = { + // List of active resources by type + active: { + asr: [], + vad: [], + tts: [], + kws: [], + speakers: [], + enhancement: [] + }, + + /** + * Track a resource for later cleanup + * @param {string} type - Resource type + * @param {Object} resource - Resource to track + * @returns {Object} The resource (for chaining) + */ + track: function(type, resource) { + if (this.active[type]) { + this.active[type].push(resource); + } + return resource; + }, + + /** + * Clean up resources of a specific type + * @param {string} [type] - Resource type (if omitted, clean all types) + */ + cleanup: function(type) { + if (type) { + // Clean up specific type + if (this.active[type]) { + this.active[type].forEach(resource => { + if (resource && typeof resource.free === 'function') { + resource.free(); + } + }); + this.active[type] = []; + } + } else { + // Clean up all types + Object.keys(this.active).forEach(t => this.cleanup(t)); + } + } + }; + + // For convenience, add alias methods + SherpaOnnx.trackResource = SherpaOnnx.Resources.track.bind(SherpaOnnx.Resources); + SherpaOnnx.cleanup = SherpaOnnx.Resources.cleanup.bind(SherpaOnnx.Resources); + + // Expose SherpaOnnx to the global object + global.SherpaOnnx = SherpaOnnx; +})(typeof window !== 'undefined' ? window : global); \ No newline at end of file diff --git a/wasm/combined/sherpa-onnx-combined-enhancement.js b/wasm/combined/sherpa-onnx-combined-enhancement.js new file mode 100644 index 0000000000..0b833c8548 --- /dev/null +++ b/wasm/combined/sherpa-onnx-combined-enhancement.js @@ -0,0 +1,96 @@ +/** + * sherpa-onnx-enhancement.js + * + * Speech Enhancement functionality for SherpaOnnx + * Requires sherpa-onnx-core.js to be loaded first + */ + +(function(global) { + // Ensure the namespace exists + if (!global.SherpaOnnx) { + console.error('SherpaOnnx namespace not found. Make sure to load sherpa-onnx-core.js first.'); + return; + } + + // Get a reference to the SherpaOnnx namespace + const SherpaOnnx = global.SherpaOnnx; + + // Create or use existing SpeechEnhancement namespace + SherpaOnnx.SpeechEnhancement = SherpaOnnx.SpeechEnhancement || {}; + + // Define the SpeechEnhancement module functionality + SherpaOnnx.SpeechEnhancement = { + /** + * Load a Speech Enhancement model from URL + * @param {Object} modelConfig - Configuration for the model + * @returns {Promise} - Information about the loaded model + */ + loadModel: async function(modelConfig) { + const modelDir = modelConfig.modelDir || 'speech-enhancement-models'; + + try { + global.Module.FS.mkdir(modelDir, 0o777); + } catch(e) { + if (e.code !== 'EEXIST') throw e; + } + + // Load the model + await SherpaOnnx.FileSystem.loadFile(modelConfig.model || 'assets/enhancement/gtcrn.onnx', `${modelDir}/model.onnx`); + + return { + modelDir: modelDir + }; + }, + + /** + * Create a Speech Enhancement instance with the loaded model + * @param {Object} loadedModel - Model information returned by loadModel + * @param {Object} options - Additional configuration options + * @returns {SpeechEnhancer} - A Speech Enhancement instance + */ + createSpeechEnhancer: function(loadedModel, options = {}) { + // This is a placeholder for actual implementation + // In a real implementation, you would create the configuration + // and pass it to the WASM module + + const config = { + model: { + gtcrn: { + model: `${loadedModel.modelDir}/model.onnx` + }, + numThreads: options.numThreads || 1, + debug: options.debug !== undefined ? options.debug : 1, + provider: options.provider || 'cpu' + } + }; + + // In a real implementation, you would create and return an instance + // of a SpeechEnhancer class + + console.warn('Speech Enhancement implementation is not fully functional yet'); + + // Placeholder for the actual implementation + return { + config: config, + + // Placeholder methods that would normally interact with the WASM module + process: function(audioSamples, sampleRate) { + console.warn('SpeechEnhancement.process is a placeholder'); + return { + enhancedSamples: audioSamples, // Just return the original samples for now + sampleRate: sampleRate + }; + }, + + free: function() { + console.warn('SpeechEnhancement.free is a placeholder'); + } + }; + } + }; + + // For Node.js environments + if (typeof module !== 'undefined' && module.exports) { + module.exports = SherpaOnnx; + } +})(typeof window !== 'undefined' ? window : global); \ No newline at end of file diff --git a/wasm/combined/sherpa-onnx-combined-kws.js b/wasm/combined/sherpa-onnx-combined-kws.js new file mode 100644 index 0000000000..5f93a19625 --- /dev/null +++ b/wasm/combined/sherpa-onnx-combined-kws.js @@ -0,0 +1,658 @@ +/** + * sherpa-onnx-kws.js + * + * Keyword Spotting functionality for SherpaOnnx + * Requires sherpa-onnx-core.js to be loaded first + */ + +(function(global) { + // Ensure the namespace exists + if (!global.SherpaOnnx) { + console.error('SherpaOnnx namespace not found. Make sure to load sherpa-onnx-core.js first.'); + return; + } + + // Get a reference to the SherpaOnnx namespace + const SherpaOnnx = global.SherpaOnnx; + + // Create or use existing KWS namespace + SherpaOnnx.KWS = SherpaOnnx.KWS || {}; + + // Define the KWS module functionality + SherpaOnnx.KWS = { + /** + * Load a KWS model from URLs + * @param {Object} modelConfig - Configuration for the model + * @returns {Promise} - Information about the loaded model + */ + loadModel: async function(modelConfig) { + const modelDir = modelConfig.modelDir || 'kws-models'; + const debug = modelConfig.debug || false; + + try { + global.Module.FS.mkdir(modelDir, 0o777); + } catch(e) { + if (e.code !== 'EEXIST') throw e; + } + + if (debug) console.log(`Loading KWS model files to ${modelDir}`); + + // Load model files and store the actual paths + const actualPaths = {}; + + // Load encoder + const encoderResult = await SherpaOnnx.FileSystem.safeLoadFile( + modelConfig.encoder || 'assets/kws/encoder.onnx', + `${modelDir}/encoder.onnx`, + debug + ); + actualPaths.encoder = encoderResult.path || `${modelDir}/encoder.onnx`; + if (debug) console.log(`Loaded encoder to ${actualPaths.encoder}`); + + // Load decoder + const decoderResult = await SherpaOnnx.FileSystem.safeLoadFile( + modelConfig.decoder || 'assets/kws/decoder.onnx', + `${modelDir}/decoder.onnx`, + debug + ); + actualPaths.decoder = decoderResult.path || `${modelDir}/decoder.onnx`; + if (debug) console.log(`Loaded decoder to ${actualPaths.decoder}`); + + // Load joiner + const joinerResult = await SherpaOnnx.FileSystem.safeLoadFile( + modelConfig.joiner || 'assets/kws/joiner.onnx', + `${modelDir}/joiner.onnx`, + debug + ); + actualPaths.joiner = joinerResult.path || `${modelDir}/joiner.onnx`; + if (debug) console.log(`Loaded joiner to ${actualPaths.joiner}`); + + // Load tokens file + const tokensResult = await SherpaOnnx.FileSystem.safeLoadFile( + modelConfig.tokens || 'assets/kws/tokens.txt', + `${modelDir}/tokens.txt`, + debug + ); + actualPaths.tokens = tokensResult.path || `${modelDir}/tokens.txt`; + if (debug) console.log(`Loaded tokens to ${actualPaths.tokens}`); + + // Load the tokens content for validation + try { + const tokensContent = global.Module.FS.readFile(actualPaths.tokens, { encoding: 'utf8' }); + actualPaths.tokensMap = this.parseTokensFile(tokensContent); + if (debug) console.log(`Parsed ${Object.keys(actualPaths.tokensMap).length} tokens`); + } catch (e) { + console.error(`Failed to read tokens file: ${e.message}`); + actualPaths.tokensMap = null; + } + + // Load keywords file if provided + if (modelConfig.keywordsFile) { + const keywordsResult = await SherpaOnnx.FileSystem.safeLoadFile( + modelConfig.keywordsFile, + `${modelDir}/keywords.txt`, + debug + ); + actualPaths.keywordsFile = keywordsResult.path || `${modelDir}/keywords.txt`; + if (debug) console.log(`Loaded keywords file to ${actualPaths.keywordsFile}`); + } + + return { + modelDir: modelDir, + paths: actualPaths + }; + }, + + /** + * Parse the tokens file to create a map of valid tokens + * @param {string} content - The content of the tokens file + * @returns {Object} - Map of tokens to their IDs + */ + parseTokensFile: function(content) { + const tokensMap = {}; + const lines = content.split('\n'); + + for (const line of lines) { + const parts = line.trim().split(' '); + if (parts.length >= 2) { + const token = parts[0]; + const id = parseInt(parts[1]); + if (!isNaN(id)) { + tokensMap[token] = id; + } + } + } + + return tokensMap; + }, + + /** + * Validate keywords against available tokens + * @param {string} keywords - The keywords to validate + * @param {Object} tokensMap - Map of valid tokens + * @returns {Object} - Validation result with formatted keywords + */ + validateKeywords: function(keywords, tokensMap) { + if (!tokensMap) return { valid: false, message: 'No tokens available for validation' }; + + const lines = keywords.split('\n'); + const validatedLines = []; + const invalidTokens = new Set(); + let isValid = true; + + for (const line of lines) { + // Skip empty lines + if (!line.trim()) continue; + + const parts = line.trim().split('@'); + let phonetic = parts[0].trim(); + const label = parts.length > 1 ? parts[1].trim() : phonetic; + + // Validate each token in the phonetic representation + const tokens = phonetic.split(' ').filter(t => t); + const validTokens = []; + + for (const token of tokens) { + if (token in tokensMap) { + validTokens.push(token); + } else { + invalidTokens.add(token); + isValid = false; + } + } + + const validatedLine = validTokens.join(' ') + ' @' + label; + validatedLines.push(validatedLine); + } + + return { + valid: isValid, + formattedKeywords: validatedLines.join('\n'), + invalidTokens: [...invalidTokens], + message: isValid ? 'All keywords are valid' : + `Invalid tokens: ${[...invalidTokens].join(', ')}` + }; + }, + + /** + * Create a Keyword Spotter with the loaded model + * @param {Object} loadedModel - Model information returned by loadModel + * @param {Object} options - Additional configuration options + * @returns {KeywordSpotter} - An instance of KeywordSpotter + */ + createKeywordSpotter: function(loadedModel, options = {}) { + const debug = options.debug || false; + + // Create transducer configuration using actual paths + const transducerConfig = { + encoder: loadedModel.paths.encoder, + decoder: loadedModel.paths.decoder, + joiner: loadedModel.paths.joiner, + }; + + // Create model configuration + const modelConfig = { + transducer: transducerConfig, + tokens: loadedModel.paths.tokens, + provider: options.provider || 'cpu', + numThreads: options.numThreads || 1, + debug: options.debug !== undefined ? options.debug : 1, + }; + + // Create feature configuration + const featConfig = { + samplingRate: options.sampleRate || 16000, + featureDim: options.featureDim || 80, + }; + + // First, create a keywords.txt file in the same directory as the tokens file + const tokensPath = loadedModel.paths.tokens; + const tokensDir = tokensPath.substring(0, tokensPath.lastIndexOf('/')); + const keywordsPath = `${tokensDir}/keywords.txt`; + + // Default keywords as individual characters + let defaultKeywords = + "h e l l o @Hello\n" + + "c o m p u t e r @Computer\n" + + "a l e x a @Alexa"; + + // Use provided keywords or default, then validate + let keywordsContent = options.keywords || defaultKeywords; + + // Validate the keywords against the tokens map if available + if (loadedModel.paths.tokensMap) { + const validationResult = this.validateKeywords(keywordsContent, loadedModel.paths.tokensMap); + + if (!validationResult.valid) { + console.warn(`Keyword validation failed: ${validationResult.message}`); + console.warn('Using only valid tokens for keywords'); + } + + keywordsContent = validationResult.formattedKeywords; + + if (debug) { + console.log(`Validation result:`, validationResult); + } + } + + try { + // Make sure file exists with absolute path + global.Module.FS.writeFile(keywordsPath, keywordsContent); + console.log(`Created keywords file at: ${keywordsPath}`); + + if (debug) { + console.log(`Keywords content: ${keywordsContent}`); + } + + // Verify the file is created + try { + const stat = global.Module.FS.stat(keywordsPath); + if (debug) console.log(`Keywords file exists, size: ${stat.size} bytes`); + } catch (e) { + console.error(`Failed to verify keywords file at ${keywordsPath}:`, e); + } + } catch (e) { + console.error('Failed to write keywords file:', e); + } + + // Create the KWS configuration + const configObj = { + featConfig: featConfig, + modelConfig: modelConfig, + maxActivePaths: options.maxActivePaths || 4, + numTrailingBlanks: options.numTrailingBlanks || 1, + keywordsScore: options.keywordsScore || 1.0, + keywordsThreshold: options.keywordsThreshold || 0.25, + keywordsFile: keywordsPath + }; + + if (debug) { + console.log('KWS Configuration:', JSON.stringify(configObj, null, 2)); + } + + // Create the KWS instance using the global createKws helper + if (typeof createKws === 'function') { + return createKws(global.Module, configObj); + } + + // Fall back to our implementation if global function not available + return new global.Kws(configObj, global.Module); + } + }; + + /** + * Wrapper for Stream class + */ + global.Stream = global.Stream || function(handle, Module) { + this.handle = handle; + this.Module = Module; + this.pointer = null; + this.n = 0; + + /** + * Free the stream + */ + this.free = function() { + if (this.handle) { + this.Module._SherpaOnnxDestroyOnlineStream(this.handle); + this.handle = null; + if (this.pointer) { + this.Module._free(this.pointer); + this.pointer = null; + this.n = 0; + } + } + }; + + /** + * Accept audio waveform data + * @param {number} sampleRate - Sample rate of the audio + * @param {Float32Array} samples - Audio samples in [-1, 1] range + */ + this.acceptWaveform = function(sampleRate, samples) { + if (this.n < samples.length) { + if (this.pointer) { + this.Module._free(this.pointer); + } + this.pointer = this.Module._malloc(samples.length * samples.BYTES_PER_ELEMENT); + this.n = samples.length; + } + + this.Module.HEAPF32.set(samples, this.pointer / samples.BYTES_PER_ELEMENT); + this.Module._SherpaOnnxOnlineStreamAcceptWaveform( + this.handle, sampleRate, this.pointer, samples.length); + }; + + /** + * Signal that input is finished + */ + this.inputFinished = function() { + this.Module._SherpaOnnxOnlineStreamInputFinished(this.handle); + }; + }; + + /** + * KeywordSpotter class + */ + global.Kws = global.Kws || function(configObj, Module) { + this.config = configObj; + + // Initialize the configuration + const config = initKwsConfig(configObj, Module); + const handle = Module._SherpaOnnxCreateKeywordSpotter(config.ptr); + + // Free the configuration + freeConfig(config, Module); + + this.handle = handle; + this.Module = Module; + + /** + * Free the keyword spotter + */ + this.free = function() { + this.Module._SherpaOnnxDestroyKeywordSpotter(this.handle); + this.handle = 0; + }; + + /** + * Create a stream for keyword spotting + * @returns {Stream} - A new stream for keyword spotting + */ + this.createStream = function() { + const handle = this.Module._SherpaOnnxCreateKeywordStream(this.handle); + return new global.Stream(handle, this.Module); + }; + + /** + * Check if the stream is ready for decoding + * @param {Stream} stream - The stream to check + * @returns {boolean} - True if ready, false otherwise + */ + this.isReady = function(stream) { + return this.Module._SherpaOnnxIsKeywordStreamReady( + this.handle, stream.handle) === 1; + }; + + /** + * Decode the audio in the stream for keyword spotting + * @param {Stream} stream - The stream to decode + */ + this.decode = function(stream) { + this.Module._SherpaOnnxDecodeKeywordStream(this.handle, stream.handle); + }; + + /** + * Reset the stream after keyword detection + * @param {Stream} stream - The stream to reset + */ + this.reset = function(stream) { + this.Module._SherpaOnnxResetKeywordStream(this.handle, stream.handle); + }; + + /** + * Get the keyword spotting result + * @param {Stream} stream - The stream to get results from + * @returns {Object} - Keyword spotting result as JSON + */ + this.getResult = function(stream) { + const r = this.Module._SherpaOnnxGetKeywordResult(this.handle, stream.handle); + const jsonPtr = this.Module.getValue(r + 24, 'i8*'); + const json = this.Module.UTF8ToString(jsonPtr); + this.Module._SherpaOnnxDestroyKeywordResult(r); + return JSON.parse(json); + }; + }; + + // For Node.js environments + if (typeof module !== 'undefined' && module.exports) { + module.exports = SherpaOnnx; + } +})(typeof window !== 'undefined' ? window : global); + +/** + * Initialize feature extractor configuration + */ +function initFeatureExtractorConfig(config, Module) { + const ptr = Module._malloc(4 * 2); + Module.setValue(ptr, config.samplingRate || 16000, 'i32'); + Module.setValue(ptr + 4, config.featureDim || 80, 'i32'); + return { + ptr: ptr, + len: 8, + }; +} + +/** + * Initialize transducer model configuration + */ +function initSherpaOnnxOnlineTransducerModelConfig(config, Module) { + const encoderLen = Module.lengthBytesUTF8(config.encoder) + 1; + const decoderLen = Module.lengthBytesUTF8(config.decoder) + 1; + const joinerLen = Module.lengthBytesUTF8(config.joiner) + 1; + + const n = encoderLen + decoderLen + joinerLen; + const buffer = Module._malloc(n); + + const len = 3 * 4; // 3 pointers + const ptr = Module._malloc(len); + + let offset = 0; + Module.stringToUTF8(config.encoder, buffer + offset, encoderLen); + offset += encoderLen; + + Module.stringToUTF8(config.decoder, buffer + offset, decoderLen); + offset += decoderLen; + + Module.stringToUTF8(config.joiner, buffer + offset, joinerLen); + + offset = 0; + Module.setValue(ptr, buffer + offset, 'i8*'); + offset += encoderLen; + + Module.setValue(ptr + 4, buffer + offset, 'i8*'); + offset += decoderLen; + + Module.setValue(ptr + 8, buffer + offset, 'i8*'); + + return { + buffer: buffer, + ptr: ptr, + len: len, + }; +} + +/** + * Initialize model configuration + */ +function initModelConfig(config, Module) { + if (!('tokensBuf' in config)) { + config.tokensBuf = ''; + } + + if (!('tokensBufSize' in config)) { + config.tokensBufSize = 0; + } + + const transducer = initSherpaOnnxOnlineTransducerModelConfig(config.transducer, Module); + const paraformer_len = 2 * 4; + const ctc_len = 1 * 4; + + const len = transducer.len + paraformer_len + ctc_len + 9 * 4; + const ptr = Module._malloc(len); + Module.HEAPU8.fill(0, ptr, ptr + len); + + let offset = 0; + Module._CopyHeap(transducer.ptr, transducer.len, ptr + offset); + + const tokensLen = Module.lengthBytesUTF8(config.tokens) + 1; + const providerLen = Module.lengthBytesUTF8(config.provider || 'cpu') + 1; + const buffer = Module._malloc(tokensLen + providerLen); + + offset = 0; + Module.stringToUTF8(config.tokens, buffer, tokensLen); + offset += tokensLen; + + Module.stringToUTF8(config.provider || 'cpu', buffer + offset, providerLen); + + offset = transducer.len + paraformer_len + ctc_len; + Module.setValue(ptr + offset, buffer, 'i8*'); + offset += 4; + + Module.setValue(ptr + offset, config.numThreads || 1, 'i32'); + offset += 4; + + Module.setValue(ptr + offset, buffer + tokensLen, 'i8*'); + offset += 4; + + Module.setValue(ptr + offset, config.debug, 'i32'); + + return { + buffer: buffer, + ptr: ptr, + len: len, + transducer: transducer + }; +} + +/** + * Initialize KWS configuration + */ +function initKwsConfig(config, Module) { + if (!('featConfig' in config)) { + config.featConfig = { + samplingRate: 16000, + featureDim: 80, + }; + } + + if (!('keywordsBuf' in config)) { + config.keywordsBuf = ''; + } + + if (!('keywordsBufSize' in config)) { + config.keywordsBufSize = 0; + } + + const featConfig = initFeatureExtractorConfig(config.featConfig, Module); + const modelConfig = initModelConfig(config.modelConfig, Module); + const numBytes = featConfig.len + modelConfig.len + 4 * 7; + + const ptr = Module._malloc(numBytes); + let offset = 0; + Module._CopyHeap(featConfig.ptr, featConfig.len, ptr + offset); + offset += featConfig.len; + + Module._CopyHeap(modelConfig.ptr, modelConfig.len, ptr + offset); + offset += modelConfig.len; + + Module.setValue(ptr + offset, config.maxActivePaths || 4, 'i32'); + offset += 4; + + Module.setValue(ptr + offset, config.numTrailingBlanks || 1, 'i32'); + offset += 4; + + Module.setValue(ptr + offset, config.keywordsScore || 1.0, 'float'); + offset += 4; + + Module.setValue(ptr + offset, config.keywordsThreshold || 0.25, 'float'); + offset += 4; + + // Handle keywords file + let keywordsFileBuffer = 0; + if (config.keywordsFile) { + const keywordsFileLen = Module.lengthBytesUTF8(config.keywordsFile) + 1; + keywordsFileBuffer = Module._malloc(keywordsFileLen); + Module.stringToUTF8(config.keywordsFile, keywordsFileBuffer, keywordsFileLen); + } + + // Set keywords_file + Module.setValue(ptr + offset, keywordsFileBuffer, 'i8*'); + offset += 4; + + // Set keywords_buf to 0 - we're using a file instead + Module.setValue(ptr + offset, 0, 'i8*'); + offset += 4; + + // Set keywords_buf_size to 0 + Module.setValue(ptr + offset, 0, 'i32'); + offset += 4; + + return { + ptr: ptr, + len: numBytes, + featConfig: featConfig, + modelConfig: modelConfig, + keywordsFileBuffer: keywordsFileBuffer + }; +} + +/** + * Free configuration memory + */ +function freeConfig(config, Module) { + if ('buffer' in config) { + Module._free(config.buffer); + } + + if ('transducer' in config) { + freeConfig(config.transducer, Module); + } + + if ('featConfig' in config) { + freeConfig(config.featConfig, Module); + } + + if ('modelConfig' in config) { + freeConfig(config.modelConfig, Module); + } + + if ('keywordsFileBuffer' in config && config.keywordsFileBuffer) { + Module._free(config.keywordsFileBuffer); + } + + Module._free(config.ptr); +} + +/** + * Global helper function to create a Kws instance + */ +function createKws(Module, myConfig) { + let transducerConfig = { + encoder: './encoder-epoch-12-avg-2-chunk-16-left-64.onnx', + decoder: './decoder-epoch-12-avg-2-chunk-16-left-64.onnx', + joiner: './joiner-epoch-12-avg-2-chunk-16-left-64.onnx', + }; + + let modelConfig = { + transducer: transducerConfig, + tokens: './tokens.txt', + provider: 'cpu', + modelType: '', + numThreads: 1, + debug: 1, + modelingUnit: 'cjkchar', + bpeVocab: '', + }; + + let featConfig = { + samplingRate: 16000, + featureDim: 80, + }; + + let configObj = { + featConfig: featConfig, + modelConfig: modelConfig, + maxActivePaths: 4, + numTrailingBlanks: 1, + keywordsScore: 1.0, + keywordsThreshold: 0.25, + // Use keywordsFile instead of keywords + keywordsFile: './keywords.txt' + }; + + if (myConfig) { + configObj = myConfig; + } + return new Kws(configObj, Module); +} \ No newline at end of file diff --git a/wasm/combined/sherpa-onnx-combined-speaker.js b/wasm/combined/sherpa-onnx-combined-speaker.js new file mode 100644 index 0000000000..5153983768 --- /dev/null +++ b/wasm/combined/sherpa-onnx-combined-speaker.js @@ -0,0 +1,110 @@ +/** + * sherpa-onnx-speaker.js + * + * Speaker Diarization functionality for SherpaOnnx + * Requires sherpa-onnx-core.js to be loaded first + */ + +(function(global) { + // Ensure the namespace exists + if (!global.SherpaOnnx) { + console.error('SherpaOnnx namespace not found. Make sure to load sherpa-onnx-core.js first.'); + return; + } + + // Get a reference to the SherpaOnnx namespace + const SherpaOnnx = global.SherpaOnnx; + + // Create or use existing SpeakerDiarization namespace + SherpaOnnx.SpeakerDiarization = SherpaOnnx.SpeakerDiarization || {}; + + // Define the SpeakerDiarization module functionality + SherpaOnnx.SpeakerDiarization = { + /** + * Load Speaker Diarization models from URLs + * @param {Object} modelConfig - Configuration for the models + * @returns {Promise} - Information about the loaded models + */ + loadModel: async function(modelConfig) { + const modelDir = modelConfig.modelDir || 'speaker-diarization-models'; + + try { + global.Module.FS.mkdir(modelDir, 0o777); + } catch(e) { + if (e.code !== 'EEXIST') throw e; + } + + // Load segmentation and embedding models + await Promise.all([ + SherpaOnnx.FileSystem.loadFile(modelConfig.segmentation || 'assets/speakers/segmentation.onnx', `${modelDir}/segmentation.onnx`), + SherpaOnnx.FileSystem.loadFile(modelConfig.embedding || 'assets/speakers/embedding.onnx', `${modelDir}/embedding.onnx`) + ]); + + return { + modelDir: modelDir + }; + }, + + /** + * Create a Speaker Diarization instance with the loaded models + * @param {Object} loadedModel - Model information returned by loadModel + * @param {Object} options - Additional configuration options + * @returns {SpeakerDiarization} - A Speaker Diarization instance + */ + createSpeakerDiarization: function(loadedModel, options = {}) { + // This is a placeholder for actual implementation + // In a real implementation, you would create the configuration + // and pass it to the WASM module + + const config = { + segmentation: { + pyannote: { + model: `${loadedModel.modelDir}/segmentation.onnx` + }, + numThreads: options.numThreads || 1, + debug: options.debug !== undefined ? options.debug : 1, + provider: options.provider || 'cpu' + }, + embedding: { + model: `${loadedModel.modelDir}/embedding.onnx`, + numThreads: options.numThreads || 1, + debug: options.debug !== undefined ? options.debug : 1, + provider: options.provider || 'cpu' + }, + clustering: { + numClusters: options.numClusters || 0, // 0 means auto-detect + threshold: options.threshold || 0.8 + }, + minDurationOn: options.minDurationOn || 0.5, + minDurationOff: options.minDurationOff || 0.5 + }; + + // In a real implementation, you would create and return an instance + // of a SpeakerDiarization class + + console.warn('Speaker Diarization implementation is not fully functional yet'); + + // Placeholder for the actual implementation + return { + config: config, + + // Placeholder methods that would normally interact with the WASM module + process: function(audioSamples, sampleRate) { + console.warn('SpeakerDiarization.process is a placeholder'); + return { + segments: [] + }; + }, + + free: function() { + console.warn('SpeakerDiarization.free is a placeholder'); + } + }; + } + }; + + // For Node.js environments + if (typeof module !== 'undefined' && module.exports) { + module.exports = SherpaOnnx; + } +})(typeof window !== 'undefined' ? window : global); \ No newline at end of file diff --git a/wasm/combined/sherpa-onnx-combined-tts.js b/wasm/combined/sherpa-onnx-combined-tts.js new file mode 100644 index 0000000000..d6f04aa606 --- /dev/null +++ b/wasm/combined/sherpa-onnx-combined-tts.js @@ -0,0 +1,1100 @@ +/** + * sherpa-onnx-tts.js + * + * Text-to-Speech functionality for SherpaOnnx + * Requires sherpa-onnx-core.js to be loaded first + */ + +(function(global) { + // Ensure the namespace exists + if (!global.SherpaOnnx) { + console.error('SherpaOnnx namespace not found. Make sure to load sherpa-onnx-core.js first.'); + return; + } + + // Get a reference to the SherpaOnnx namespace + const SherpaOnnx = global.SherpaOnnx; + + // Create or use existing TTS namespace + SherpaOnnx.TTS = SherpaOnnx.TTS || {}; + + // Define the TTS module functionality + SherpaOnnx.TTS = { + /** + * Load a Text-to-Speech model + * @param {Object} modelConfig - Configuration for the model + * @returns {Promise} - Information about the loaded model + */ + loadModel: async function(modelConfig) { + const debug = modelConfig.debug || false; + if (debug) console.log("TTS.loadModel: ModelConfig:", JSON.stringify(modelConfig)); + + // Handle custom model upload case + if (modelConfig.customModel) { + if (debug) console.log("Using custom uploaded model"); + + // Validate basic requirements + if (!modelConfig.customModel.model && !modelConfig.customModel.acousticModel) { + throw new Error("Missing required model file in custom model"); + } + + if (!modelConfig.customModel.tokens) { + throw new Error("Missing required tokens.txt file in custom model"); + } + + return { + modelDir: modelConfig.customModel.dataDir || + (modelConfig.customModel.model + ? modelConfig.customModel.model.split('/').slice(0, -1).join('/') + : modelConfig.customModel.acousticModel.split('/').slice(0, -1).join('/')), + modelType: modelConfig.modelType || 'vits', + actualPaths: modelConfig.customModel, + preloaded: false, + options: modelConfig.options || {} + }; + } + + // Default model directory and type handling + const modelDir = modelConfig.modelDir || 'tts-models'; + const modelType = modelConfig.modelType || 'vits'; + + // First check for preloaded assets + if (!modelConfig.forceDownload) { + const assetPath = SherpaOnnx.Config.assetPaths.tts; + if (debug) console.log(`Checking for preloaded TTS assets at ${assetPath}`); + + if (SherpaOnnx.FileSystem.fileExists(assetPath)) { + const files = SherpaOnnx.FileSystem.listFiles(assetPath); + if (debug) console.log(`Found preloaded files: ${files.join(', ')}`); + + // Check for required model files based on type + let hasRequiredFiles = false; + const actualPaths = {}; + + if (modelType === 'vits') { + // VITS model requires model, lexicon, and tokens files + const modelFile = files.find(f => f.endsWith('.onnx')); + const tokensFile = files.find(f => f === 'tokens.txt'); + + // Check for espeak data directory or zip + let hasEspeakData = files.find(f => f === 'espeak-ng-data' || f === 'espeak-ng-data.zip'); + + if (modelFile && tokensFile) { + hasRequiredFiles = true; + actualPaths.model = `${assetPath}/${modelFile}`; + actualPaths.tokens = `${assetPath}/${tokensFile}`; + + // Add espeak data if found + if (hasEspeakData) { + if (hasEspeakData === 'espeak-ng-data') { + actualPaths.dataDir = `${assetPath}/espeak-ng-data`; + } else { + // Will need to extract this later + actualPaths.espeakZip = `${assetPath}/espeak-ng-data.zip`; + } + } + } + } + + if (hasRequiredFiles) { + if (debug) console.log("Using preloaded TTS model with paths:", actualPaths); + return { + modelDir: assetPath, + modelType, + actualPaths, + preloaded: true, + options: modelConfig.options || {} + }; + } + + if (debug) console.log("Preloaded TTS assets found but missing required files"); + } else if (debug) { + console.log(`Asset path ${assetPath} not found, will need to download models`); + } + + // Also check alternative locations for preloaded assets + const alternativePaths = [ + `/sherpa_assets/tts`, + `/assets/tts`, + `/preloaded/tts` + ]; + + for (const altPath of alternativePaths) { + if (altPath === assetPath) continue; // Skip if we've already checked this path + + if (debug) console.log(`Checking alternative path: ${altPath}`); + + if (SherpaOnnx.FileSystem.fileExists(altPath)) { + const files = SherpaOnnx.FileSystem.listFiles(altPath); + if (debug) console.log(`Found files at ${altPath}: ${files.join(', ')}`); + + // Similar check for required files + let hasRequiredFiles = false; + const actualPaths = {}; + + if (modelType === 'vits') { + const modelFile = files.find(f => f.endsWith('.onnx')); + const tokensFile = files.find(f => f === 'tokens.txt'); + + // Check for espeak data directory or zip + let hasEspeakData = files.find(f => f === 'espeak-ng-data' || f === 'espeak-ng-data.zip'); + + if (modelFile && tokensFile) { + hasRequiredFiles = true; + actualPaths.model = `${altPath}/${modelFile}`; + actualPaths.tokens = `${altPath}/${tokensFile}`; + + // Add espeak data if found + if (hasEspeakData) { + if (hasEspeakData === 'espeak-ng-data') { + actualPaths.dataDir = `${altPath}/espeak-ng-data`; + } else { + // Will need to extract this later + actualPaths.espeakZip = `${altPath}/espeak-ng-data.zip`; + } + } + } + } + + if (hasRequiredFiles) { + if (debug) console.log(`Using alternative preloaded TTS model path: ${altPath}`); + // Update the config to use this path in the future + SherpaOnnx.Config.assetPaths.tts = altPath; + + return { + modelDir: altPath, + modelType, + actualPaths, + preloaded: true, + options: modelConfig.options || {} + }; + } + } + } + } + + // If we reached here, we couldn't find preloaded assets + throw new Error("No preloaded TTS model found and dynamic loading is not implemented"); + }, + + /** + * Create a TTS engine with the loaded model + * @param {Object} loadedModel - Model information returned by loadModel + * @param {Object} options - Additional configuration options + * @returns {OfflineTts} - An instance of OfflineTts + */ + createOfflineTts: function(loadedModel, options = {}) { + const debug = options.debug !== undefined ? options.debug : false; + + if (debug) { + console.log("Creating TTS engine with loaded model:", loadedModel); + } + + // Always use a single consistent property name for model type + const modelType = loadedModel.modelType || 'vits'; + + if (debug) { + console.log(`Using model type: ${modelType}`); + } + + // Merge options from loadedModel with function options, prioritizing function options + const mergedOptions = { + ...loadedModel.options, + ...options + }; + + if (debug) { + console.log("Using merged options:", mergedOptions); + } + + let config = null; + + if (modelType === 'vits') { + // For preloaded assets, we use actualPaths + const paths = loadedModel.actualPaths || loadedModel.files || {}; + + if (debug) { + console.log("Using model paths:", paths); + } + + if (!paths.model || !paths.tokens) { + throw new Error("Missing required files for VITS model configuration"); + } + + const offlineTtsVitsModelConfig = { + model: paths.model, + lexicon: paths.lexicon || '', + tokens: paths.tokens, + dataDir: paths.dataDir || `${loadedModel.modelDir}/espeak-ng-data`, // Path to espeak-ng-data in model directory + dictDir: paths.dictDir || '', + noiseScale: mergedOptions.noiseScale || 0.667, + noiseScaleW: mergedOptions.noiseScaleW || 0.8, + lengthScale: mergedOptions.lengthScale || 1.0, + }; + + if (debug) { + console.log("VITS model config:", offlineTtsVitsModelConfig); + } + + const offlineTtsMatchaModelConfig = { + acousticModel: paths.acousticModel || '', + vocoder: paths.vocoder || '', + lexicon: paths.lexicon || '', + tokens: paths.tokens || '', + dataDir: paths.dataDir || '', + dictDir: paths.dictDir || '', + noiseScale: mergedOptions.noiseScale || 0.667, + lengthScale: mergedOptions.lengthScale || 1.0, + }; + + const offlineTtsKokoroModelConfig = { + model: paths.model || '', + voices: paths.voices || '', + tokens: paths.tokens || '', + dataDir: paths.dataDir || '', + lengthScale: mergedOptions.lengthScale || 1.0, + dictDir: paths.dictDir || '', + lexicon: paths.lexicon || '', + }; + + // Use the correct field names expected by the C API + const offlineTtsModelConfig = { + offlineTtsVitsModelConfig: offlineTtsVitsModelConfig, + offlineTtsMatchaModelConfig: offlineTtsMatchaModelConfig, + offlineTtsKokoroModelConfig: offlineTtsKokoroModelConfig, + numThreads: mergedOptions.numThreads || 1, + debug: debug ? 1 : 0, + provider: mergedOptions.provider || 'cpu', + }; + + config = { + offlineTtsModelConfig: offlineTtsModelConfig, + ruleFsts: mergedOptions.ruleFsts || '', + ruleFars: mergedOptions.ruleFars || '', + maxNumSentences: mergedOptions.maxNumSentences || 1, + silenceScale: mergedOptions.silenceScale || 1.0 + }; + } else if (modelType === 'matcha') { + // Similar configuration for matcha... + const paths = loadedModel.actualPaths || loadedModel.files || {}; + + if (!paths.acousticModel || !paths.vocoder || !paths.tokens) { + throw new Error("Missing required files for Matcha model configuration"); + } + + const offlineTtsVitsModelConfig = { + model: '', + lexicon: '', + tokens: '', + dataDir: '', + dictDir: '', + noiseScale: 0.667, + noiseScaleW: 0.8, + lengthScale: 1.0, + }; + + const offlineTtsMatchaModelConfig = { + acousticModel: paths.acousticModel, + vocoder: paths.vocoder, + lexicon: paths.lexicon || '', + tokens: paths.tokens, + dataDir: paths.dataDir || '', + dictDir: paths.dictDir || '', + noiseScale: mergedOptions.noiseScale || 0.667, + lengthScale: mergedOptions.lengthScale || 1.0, + }; + + const offlineTtsKokoroModelConfig = { + model: '', + voices: '', + tokens: '', + dataDir: '', + lengthScale: 1.0, + dictDir: '', + lexicon: '', + }; + + const offlineTtsModelConfig = { + offlineTtsVitsModelConfig: offlineTtsVitsModelConfig, + offlineTtsMatchaModelConfig: offlineTtsMatchaModelConfig, + offlineTtsKokoroModelConfig: offlineTtsKokoroModelConfig, + numThreads: mergedOptions.numThreads || 1, + debug: debug ? 1 : 0, + provider: mergedOptions.provider || 'cpu', + }; + + config = { + offlineTtsModelConfig: offlineTtsModelConfig, + ruleFsts: mergedOptions.ruleFsts || '', + ruleFars: mergedOptions.ruleFars || '', + maxNumSentences: mergedOptions.maxNumSentences || 1, + silenceScale: mergedOptions.silenceScale || 1.0 + }; + } else if (modelType === 'kokoro') { + // Similar configuration for kokoro... + const paths = loadedModel.actualPaths || loadedModel.files || {}; + + if (!paths.model || !paths.voices || !paths.tokens) { + throw new Error("Missing required files for Kokoro model configuration"); + } + + const offlineTtsVitsModelConfig = { + model: '', + lexicon: '', + tokens: '', + dataDir: '', + dictDir: '', + noiseScale: 0.667, + noiseScaleW: 0.8, + lengthScale: 1.0, + }; + + const offlineTtsMatchaModelConfig = { + acousticModel: '', + vocoder: '', + lexicon: '', + tokens: '', + dataDir: '', + dictDir: '', + noiseScale: 0.667, + lengthScale: 1.0, + }; + + const offlineTtsKokoroModelConfig = { + model: paths.model, + voices: paths.voices, + tokens: paths.tokens, + dataDir: paths.dataDir || '', + lengthScale: mergedOptions.lengthScale || 1.0, + dictDir: paths.dictDir || '', + lexicon: paths.lexicon || '', + }; + + const offlineTtsModelConfig = { + offlineTtsVitsModelConfig: offlineTtsVitsModelConfig, + offlineTtsMatchaModelConfig: offlineTtsMatchaModelConfig, + offlineTtsKokoroModelConfig: offlineTtsKokoroModelConfig, + numThreads: mergedOptions.numThreads || 1, + debug: debug ? 1 : 0, + provider: mergedOptions.provider || 'cpu', + }; + + config = { + offlineTtsModelConfig: offlineTtsModelConfig, + ruleFsts: mergedOptions.ruleFsts || '', + ruleFars: mergedOptions.ruleFars || '', + maxNumSentences: mergedOptions.maxNumSentences || 1, + silenceScale: mergedOptions.silenceScale || 1.0 + }; + } else { + throw new Error(`Unsupported TTS model type: ${modelType}`); + } + + if (debug) { + console.log("Final TTS configuration:", JSON.stringify(config)); + } + + try { + // Create the offline TTS object + const tts = this.createOfflineTtsInternal(config, global.Module); + + // Track the resource for cleanup if tracking function is available + if (SherpaOnnx.trackResource) { + SherpaOnnx.trackResource('tts', tts); + } + + return tts; + } catch (error) { + console.error("Error creating TTS engine:", error); + throw error; + } + }, + + /** + * Internal function to create an offline TTS engine + * Following the reference implementation pattern + */ + createOfflineTtsInternal: function(config, Module) { + if (!config) { + console.error("TTS configuration is null or undefined"); + return null; + } + + if (typeof createOfflineTts === 'function') { + // Use the global createOfflineTts function if available + return createOfflineTts(Module, config); + } + + // Otherwise use our own implementation + return new global.OfflineTts(config, Module); + } + }; + + /** + * Free configuration memory allocated in WASM + * @param {Object} config - Configuration object with allocated memory + * @param {Object} Module - WebAssembly module + * @private + */ + function freeConfig(config, Module) { + if ('buffer' in config) { + Module._free(config.buffer); + } + + if ('config' in config) { + freeConfig(config.config, Module); + } + + if ('matcha' in config) { + freeConfig(config.matcha, Module); + } + + if ('kokoro' in config) { + freeConfig(config.kokoro, Module); + } + + if (config.ptr) { + Module._free(config.ptr); + } + } + + /** + * Initialize VITS model configuration + * @param {Object} config - VITS configuration + * @param {Object} Module - WebAssembly module + * @returns {Object} - Configuration with pointers + */ + function initSherpaOnnxOfflineTtsVitsModelConfig(config, Module) { + const modelLen = Module.lengthBytesUTF8(config.model || '') + 1; + const lexiconLen = Module.lengthBytesUTF8(config.lexicon || '') + 1; + const tokensLen = Module.lengthBytesUTF8(config.tokens || '') + 1; + const dataDirLen = Module.lengthBytesUTF8(config.dataDir || '') + 1; + const dictDirLen = Module.lengthBytesUTF8(config.dictDir || '') + 1; + + const n = modelLen + lexiconLen + tokensLen + dataDirLen + dictDirLen; + const buffer = Module._malloc(n); + + const len = 8 * 4; + const ptr = Module._malloc(len); + + let offset = 0; + Module.stringToUTF8(config.model || '', buffer + offset, modelLen); + offset += modelLen; + + Module.stringToUTF8(config.lexicon || '', buffer + offset, lexiconLen); + offset += lexiconLen; + + Module.stringToUTF8(config.tokens || '', buffer + offset, tokensLen); + offset += tokensLen; + + Module.stringToUTF8(config.dataDir || '', buffer + offset, dataDirLen); + offset += dataDirLen; + + Module.stringToUTF8(config.dictDir || '', buffer + offset, dictDirLen); + offset += dictDirLen; + + offset = 0; + Module.setValue(ptr, buffer + offset, 'i8*'); + offset += modelLen; + + Module.setValue(ptr + 4, buffer + offset, 'i8*'); + offset += lexiconLen; + + Module.setValue(ptr + 8, buffer + offset, 'i8*'); + offset += tokensLen; + + Module.setValue(ptr + 12, buffer + offset, 'i8*'); + offset += dataDirLen; + + Module.setValue(ptr + 16, config.noiseScale || 0.667, 'float'); + Module.setValue(ptr + 20, config.noiseScaleW || 0.8, 'float'); + Module.setValue(ptr + 24, config.lengthScale || 1.0, 'float'); + Module.setValue(ptr + 28, buffer + offset, 'i8*'); + offset += dictDirLen; + + return { + buffer: buffer, ptr: ptr, len: len, + }; + } + + /** + * Initialize Matcha model configuration + * @param {Object} config - Matcha configuration + * @param {Object} Module - WebAssembly module + * @returns {Object} - Configuration with pointers + */ + function initSherpaOnnxOfflineTtsMatchaModelConfig(config, Module) { + const acousticModelLen = Module.lengthBytesUTF8(config.acousticModel || '') + 1; + const vocoderLen = Module.lengthBytesUTF8(config.vocoder || '') + 1; + const lexiconLen = Module.lengthBytesUTF8(config.lexicon || '') + 1; + const tokensLen = Module.lengthBytesUTF8(config.tokens || '') + 1; + const dataDirLen = Module.lengthBytesUTF8(config.dataDir || '') + 1; + const dictDirLen = Module.lengthBytesUTF8(config.dictDir || '') + 1; + + const n = acousticModelLen + vocoderLen + lexiconLen + tokensLen + + dataDirLen + dictDirLen; + + const buffer = Module._malloc(n); + const len = 8 * 4; + const ptr = Module._malloc(len); + + let offset = 0; + Module.stringToUTF8( + config.acousticModel || '', buffer + offset, acousticModelLen); + offset += acousticModelLen; + + Module.stringToUTF8(config.vocoder || '', buffer + offset, vocoderLen); + offset += vocoderLen; + + Module.stringToUTF8(config.lexicon || '', buffer + offset, lexiconLen); + offset += lexiconLen; + + Module.stringToUTF8(config.tokens || '', buffer + offset, tokensLen); + offset += tokensLen; + + Module.stringToUTF8(config.dataDir || '', buffer + offset, dataDirLen); + offset += dataDirLen; + + Module.stringToUTF8(config.dictDir || '', buffer + offset, dictDirLen); + offset += dictDirLen; + + offset = 0; + Module.setValue(ptr, buffer + offset, 'i8*'); + offset += acousticModelLen; + + Module.setValue(ptr + 4, buffer + offset, 'i8*'); + offset += vocoderLen; + + Module.setValue(ptr + 8, buffer + offset, 'i8*'); + offset += lexiconLen; + + Module.setValue(ptr + 12, buffer + offset, 'i8*'); + offset += tokensLen; + + Module.setValue(ptr + 16, buffer + offset, 'i8*'); + offset += dataDirLen; + + Module.setValue(ptr + 20, config.noiseScale || 0.667, 'float'); + Module.setValue(ptr + 24, config.lengthScale || 1.0, 'float'); + Module.setValue(ptr + 28, buffer + offset, 'i8*'); + offset += dictDirLen; + + return { + buffer: buffer, ptr: ptr, len: len, + }; + } + + /** + * Initialize Kokoro model configuration + * @param {Object} config - Kokoro configuration + * @param {Object} Module - WebAssembly module + * @returns {Object} - Configuration with pointers + */ + function initSherpaOnnxOfflineTtsKokoroModelConfig(config, Module) { + const modelLen = Module.lengthBytesUTF8(config.model || '') + 1; + const voicesLen = Module.lengthBytesUTF8(config.voices || '') + 1; + const tokensLen = Module.lengthBytesUTF8(config.tokens || '') + 1; + const dataDirLen = Module.lengthBytesUTF8(config.dataDir || '') + 1; + const dictDirLen = Module.lengthBytesUTF8(config.dictDir || '') + 1; + const lexiconLen = Module.lengthBytesUTF8(config.lexicon || '') + 1; + + const n = modelLen + voicesLen + tokensLen + dataDirLen + dictDirLen + lexiconLen; + const buffer = Module._malloc(n); + + const len = 7 * 4; + const ptr = Module._malloc(len); + + let offset = 0; + Module.stringToUTF8(config.model || '', buffer + offset, modelLen); + offset += modelLen; + + Module.stringToUTF8(config.voices || '', buffer + offset, voicesLen); + offset += voicesLen; + + Module.stringToUTF8(config.tokens || '', buffer + offset, tokensLen); + offset += tokensLen; + + Module.stringToUTF8(config.dataDir || '', buffer + offset, dataDirLen); + offset += dataDirLen; + + Module.stringToUTF8(config.dictDir || '', buffer + offset, dictDirLen); + offset += dictDirLen; + + Module.stringToUTF8(config.lexicon || '', buffer + offset, lexiconLen); + offset += lexiconLen; + + offset = 0; + Module.setValue(ptr, buffer + offset, 'i8*'); + offset += modelLen; + + Module.setValue(ptr + 4, buffer + offset, 'i8*'); + offset += voicesLen; + + Module.setValue(ptr + 8, buffer + offset, 'i8*'); + offset += tokensLen; + + Module.setValue(ptr + 12, buffer + offset, 'i8*'); + offset += dataDirLen; + + Module.setValue(ptr + 16, config.lengthScale || 1.0, 'float'); + + Module.setValue(ptr + 20, buffer + offset, 'i8*'); + offset += dictDirLen; + + Module.setValue(ptr + 24, buffer + offset, 'i8*'); + offset += lexiconLen; + + return { + buffer: buffer, ptr: ptr, len: len, + }; + } + + /** + * Initialize offline TTS model configuration + * @param {Object} config - Model configuration + * @param {Object} Module - WebAssembly module + * @returns {Object} - Configuration with pointers + */ + function initSherpaOnnxOfflineTtsModelConfig(config, Module) { + if (Module.debug) { + console.log("Initializing offline TTS model config:", JSON.stringify(config)); + } + + // Get configurations, supporting both old and new formats + const vitsConfig = config.vits || config.offlineTtsVitsModelConfig || { + model: './model.onnx', + lexicon: '', + tokens: './tokens.txt', + dataDir: './espeak-ng-data', // Use relative path in the model directory + dictDir: '', + noiseScale: 0.667, + noiseScaleW: 0.8, + lengthScale: 1.0, + }; + + const matchaConfig = config.matcha || config.offlineTtsMatchaModelConfig || { + acousticModel: '', + vocoder: '', + lexicon: '', + tokens: '', + dataDir: '', + dictDir: '', + noiseScale: 0.667, + lengthScale: 1.0, + }; + + const kokoroConfig = config.kokoro || config.offlineTtsKokoroModelConfig || { + model: '', + voices: '', + tokens: '', + dataDir: '', + lengthScale: 1.0, + dictDir: '', + lexicon: '', + }; + + const vitsModelConfig = initSherpaOnnxOfflineTtsVitsModelConfig( + vitsConfig, Module); + + const matchaModelConfig = initSherpaOnnxOfflineTtsMatchaModelConfig( + matchaConfig, Module); + + const kokoroModelConfig = initSherpaOnnxOfflineTtsKokoroModelConfig( + kokoroConfig, Module); + + const len = vitsModelConfig.len + matchaModelConfig.len + + kokoroModelConfig.len + 3 * 4; + + const ptr = Module._malloc(len); + + let offset = 0; + Module._CopyHeap(vitsModelConfig.ptr, vitsModelConfig.len, ptr + offset); + offset += vitsModelConfig.len; + + Module.setValue(ptr + offset, config.numThreads || 1, 'i32'); + offset += 4; + + Module.setValue(ptr + offset, config.debug || 0, 'i32'); + offset += 4; + + const providerLen = Module.lengthBytesUTF8(config.provider || 'cpu') + 1; + const buffer = Module._malloc(providerLen); + Module.stringToUTF8(config.provider || 'cpu', buffer, providerLen); + Module.setValue(ptr + offset, buffer, 'i8*'); + offset += 4; + + Module._CopyHeap(matchaModelConfig.ptr, matchaModelConfig.len, ptr + offset); + offset += matchaModelConfig.len; + + Module._CopyHeap(kokoroModelConfig.ptr, kokoroModelConfig.len, ptr + offset); + offset += kokoroModelConfig.len; + + return { + buffer: buffer, ptr: ptr, len: len, config: vitsModelConfig, + matcha: matchaModelConfig, kokoro: kokoroModelConfig, + }; + } + + /** + * Initialize the TTS configuration + * @param {Object} config - TTS configuration + * @param {Object} Module - WebAssembly module + * @returns {Object} - Configuration with pointers + */ + function initSherpaOnnxOfflineTtsConfig(config, Module) { + // Log for debugging + if (Module.debug) { + console.log("Initializing TTS config:", JSON.stringify(config)); + } + + // Make sure we have an offlineTtsModelConfig + if (!config.offlineTtsModelConfig) { + if (Module.debug) { + console.log("No offlineTtsModelConfig found, creating default"); + } + + // Use provided defaults or create new ones + config.offlineTtsModelConfig = { + offlineTtsVitsModelConfig: { + model: './model.onnx', + lexicon: '', + tokens: './tokens.txt', + dataDir: './espeak-ng-data', + dictDir: '', + noiseScale: 0.667, + noiseScaleW: 0.8, + lengthScale: 1.0, + }, + offlineTtsMatchaModelConfig: { + acousticModel: '', + vocoder: '', + lexicon: '', + tokens: '', + dataDir: '', + dictDir: '', + noiseScale: 0.667, + lengthScale: 1.0, + }, + offlineTtsKokoroModelConfig: { + model: '', + voices: '', + tokens: '', + dataDir: '', + lengthScale: 1.0, + dictDir: '', + lexicon: '', + }, + numThreads: 1, + debug: Module.debug ? 1 : 0, + provider: 'cpu', + }; + } + + // Initialize model config + const initializedModelConfig = + initSherpaOnnxOfflineTtsModelConfig(config.offlineTtsModelConfig, Module); + + const len = initializedModelConfig.len + 4 * 4; + const ptr = Module._malloc(len); + + let offset = 0; + Module._CopyHeap(initializedModelConfig.ptr, initializedModelConfig.len, ptr + offset); + offset += initializedModelConfig.len; + + const ruleFstsLen = Module.lengthBytesUTF8(config.ruleFsts || '') + 1; + const ruleFarsLen = Module.lengthBytesUTF8(config.ruleFars || '') + 1; + + const buffer = Module._malloc(ruleFstsLen + ruleFarsLen); + Module.stringToUTF8(config.ruleFsts || '', buffer, ruleFstsLen); + Module.stringToUTF8(config.ruleFars || '', buffer + ruleFstsLen, ruleFarsLen); + + Module.setValue(ptr + offset, buffer, 'i8*'); + offset += 4; + + Module.setValue(ptr + offset, config.maxNumSentences || 1, 'i32'); + offset += 4; + + Module.setValue(ptr + offset, buffer + ruleFstsLen, 'i8*'); + offset += 4; + + Module.setValue(ptr + offset, config.silenceScale || 1.0, 'float'); + offset += 4; + + return { + buffer: buffer, ptr: ptr, len: len, config: initializedModelConfig, + }; + } + + /** + * OfflineTts class for text-to-speech synthesis + */ + global.OfflineTts = global.OfflineTts || function(configObj, Module) { + if (!Module) { + throw new Error("WASM Module is required for OfflineTts"); + } + + this.Module = Module; + this.handle = null; + this.sampleRate = 0; + this.numSpeakers = 0; + this.generatedAudios = []; // Track generated audios for cleanup + + const debug = Module.debug || (configObj && configObj.debug); + + if (debug) { + console.log("Creating OfflineTts with config:", JSON.stringify(configObj)); + } + + try { + // Initialize the TTS configuration + const config = initSherpaOnnxOfflineTtsConfig(configObj, Module); + + if (debug) { + try { + Module._MyPrintTTS(config.ptr); + } catch (e) { + console.warn("Failed to print TTS config:", e); + } + } + + // Create the TTS engine + const handle = Module._SherpaOnnxCreateOfflineTts(config.ptr); + + if (!handle || handle === 0) { + const error = new Error("Failed to create TTS engine - null handle returned"); + freeConfig(config, Module); + throw error; + } + + // Free the configuration memory now that we have the handle + freeConfig(config, Module); + + // Store the handle and get basic information about the TTS engine + this.handle = handle; + + try { + this.sampleRate = Module._SherpaOnnxOfflineTtsSampleRate(this.handle); + this.numSpeakers = Module._SherpaOnnxOfflineTtsNumSpeakers(this.handle); + + if (debug) { + console.log(`TTS engine initialized. Sample rate: ${this.sampleRate}Hz, Number of speakers: ${this.numSpeakers}`); + } + } catch (e) { + console.error("Error getting TTS engine information:", e); + // Don't throw here, we can continue with defaults + } + } catch (e) { + // Clean up any resources if initialization failed + if (this.handle) { + try { + Module._SherpaOnnxDestroyOfflineTts(this.handle); + } catch (cleanupError) { + console.error("Error cleaning up after failed initialization:", cleanupError); + } + this.handle = null; + } + + // Re-throw the original error + throw e; + } + + /** + * Generate speech from text + * @param {string} text - Text to synthesize + * @param {number} sid - Speaker ID (0 to numSpeakers-1) + * @param {number} speed - Speed factor (1.0 is normal speed) + * @returns {Object} - Object containing audio samples and sample rate + */ + this.generate = function(text, sid = 0, speed = 1.0) { + if (this.Module.debug) { + console.log(`Generating speech for text: "${text}", sid: ${sid}, speed: ${speed}`); + } + + const textLen = this.Module.lengthBytesUTF8(text) + 1; + const textPtr = this.Module._malloc(textLen); + this.Module.stringToUTF8(text, textPtr, textLen); + + const h = this.Module._SherpaOnnxOfflineTtsGenerate( + this.handle, textPtr, sid, speed); + + this.Module._free(textPtr); + + if (!h || h === 0) { + throw new Error("Failed to generate speech - null pointer returned"); + } + + // Access the generated audio structure + // The structure has this format in C: + // struct SherpaOnnxOfflineTtsGeneratedAudio { + // float *samples; + // int32_t n; + // int32_t sample_rate; + // }; + try { + // Read the number of samples and sample rate from memory + const numSamples = this.Module.getValue(h + 4, 'i32'); + const sampleRate = this.Module.getValue(h + 8, 'i32'); + + if (this.Module.debug) { + console.log(`Generated ${numSamples} samples at ${sampleRate}Hz`); + } + + // Get the pointer to the audio samples array + const samplesPtr = this.Module.getValue(h, '*'); + + if (!samplesPtr) { + throw new Error("Failed to read audio samples pointer"); + } + + // Copy samples to a new Float32Array + const samples = new Float32Array(numSamples); + for (let i = 0; i < numSamples; i++) { + samples[i] = this.Module.getValue(samplesPtr + (i * 4), 'float'); + } + + // Add to our tracking list + this.generatedAudios.push(h); + + return { + samples: samples, + sampleRate: sampleRate, + // Add a cleanup function for this specific audio + free: () => { + const index = this.generatedAudios.indexOf(h); + if (index !== -1) { + this.Module._SherpaOnnxDestroyOfflineTtsGeneratedAudio(h); + this.generatedAudios.splice(index, 1); + } + } + }; + } catch (error) { + // Clean up on error to avoid memory leaks + if (h) { + this.Module._SherpaOnnxDestroyOfflineTtsGeneratedAudio(h); + } + console.error("Error accessing generated audio data:", error); + throw new Error("Failed to process generated audio: " + error.message); + } + }; + + /** + * Save generated audio to a WAV file (for browser environments) + * @param {Float32Array} samples - Audio samples + * @param {number} sampleRate - Sample rate + * @returns {Blob} - WAV file as Blob + */ + this.saveAsWav = function(samples, sampleRate) { + // Create WAV file in memory + const numSamples = samples.length; + const dataSize = numSamples * 2; // 16-bit samples + const bufferSize = 44 + dataSize; + + const buffer = new ArrayBuffer(bufferSize); + const view = new DataView(buffer); + + // WAV header (http://soundfile.sapp.org/doc/WaveFormat/) + view.setUint32(0, 0x46464952, true); // 'RIFF' + view.setUint32(4, bufferSize - 8, true); // chunk size + view.setUint32(8, 0x45564157, true); // 'WAVE' + view.setUint32(12, 0x20746d66, true); // 'fmt ' + view.setUint32(16, 16, true); // subchunk1 size + view.setUint16(20, 1, true); // PCM format + view.setUint16(22, 1, true); // mono + view.setUint32(24, sampleRate, true); // sample rate + view.setUint32(28, sampleRate * 2, true); // byte rate + view.setUint16(32, 2, true); // block align + view.setUint16(34, 16, true); // bits per sample + view.setUint32(36, 0x61746164, true); // 'data' + view.setUint32(40, dataSize, true); // subchunk2 size + + // Write audio data + for (let i = 0; i < numSamples; i++) { + // Convert float to 16-bit PCM + let sample = samples[i]; + if (sample > 1.0) sample = 1.0; + if (sample < -1.0) sample = -1.0; + + const pcm = Math.floor(sample * 32767); + view.setInt16(44 + i * 2, pcm, true); + } + + return new Blob([buffer], { type: 'audio/wav' }); + }; + + /** + * Free the TTS engine and all generated audios + */ + this.free = function() { + // Free all generated audios first + for (let i = this.generatedAudios.length - 1; i >= 0; i--) { + if (this.generatedAudios[i]) { + this.Module._SherpaOnnxDestroyOfflineTtsGeneratedAudio(this.generatedAudios[i]); + } + } + this.generatedAudios = []; + + // Free the TTS engine + if (this.handle) { + this.Module._SherpaOnnxDestroyOfflineTts(this.handle); + this.handle = 0; + } + }; + }; + + // For Node.js environments + if (typeof module !== 'undefined' && module.exports) { + module.exports = SherpaOnnx; + } +})(typeof window !== 'undefined' ? window : global); + +/** + * Global helper function to create an OfflineTts instance + */ +function createOfflineTts(Module, config) { + // Use provided config or create default + if (config) return new OfflineTts(config, Module); + + // Default configuration pointing to extracted espeak-ng-data + const defaultConfig = { + offlineTtsModelConfig: { + offlineTtsVitsModelConfig: { + model: './model.onnx', + lexicon: '', + tokens: './tokens.txt', + dataDir: './espeak-ng-data', // Use relative path in the model directory + dictDir: '', + noiseScale: 0.667, + noiseScaleW: 0.8, + lengthScale: 1.0, + }, + offlineTtsMatchaModelConfig: { + acousticModel: '', + vocoder: '', + lexicon: '', + tokens: '', + dataDir: '', + dictDir: '', + noiseScale: 0.667, + lengthScale: 1.0, + }, + offlineTtsKokoroModelConfig: { + model: '', + voices: '', + tokens: '', + dataDir: '', + lengthScale: 1.0, + dictDir: '', + lexicon: '', + }, + numThreads: 1, + debug: 1, + provider: 'cpu', + }, + ruleFsts: '', + ruleFars: '', + maxNumSentences: 1, + silenceScale: 1.0 + }; + + return new OfflineTts(defaultConfig, Module); +} \ No newline at end of file diff --git a/wasm/combined/sherpa-onnx-combined-vad.js b/wasm/combined/sherpa-onnx-combined-vad.js new file mode 100644 index 0000000000..a84046210f --- /dev/null +++ b/wasm/combined/sherpa-onnx-combined-vad.js @@ -0,0 +1,310 @@ +/** + * sherpa-onnx-vad.js + * + * Voice Activity Detection functionality for SherpaOnnx + * Requires sherpa-onnx-core.js to be loaded first + */ + +(function(global) { + // Ensure the namespace exists + if (!global.SherpaOnnx) { + console.error('SherpaOnnx namespace not found. Make sure to load sherpa-onnx-core.js first.'); + return; + } + + // Get a reference to the SherpaOnnx namespace + const SherpaOnnx = global.SherpaOnnx; + + // Create or use existing VAD namespace + SherpaOnnx.VAD = SherpaOnnx.VAD || {}; + + // Internal class for voice activity detection + class VoiceActivityDetector { + constructor(handle, Module) { + this.handle = handle; + this.Module = Module; + } + + /** + * Accept audio waveform data + * @param {Float32Array} samples - Audio samples in [-1, 1] range + */ + acceptWaveform(samples) { + const pointer = this.Module._malloc(samples.length * samples.BYTES_PER_ELEMENT); + this.Module.HEAPF32.set(samples, pointer / samples.BYTES_PER_ELEMENT); + + this.Module._SherpaOnnxVoiceActivityDetectorAcceptWaveform( + this.handle, pointer, samples.length + ); + + this.Module._free(pointer); + } + + /** + * Check if there are no speech segments available + * @returns {boolean} - True if no segments available, false otherwise + */ + isEmpty() { + return this.Module._SherpaOnnxVoiceActivityDetectorEmpty(this.handle) === 1; + } + + /** + * Check if voice is detected + * @returns {boolean} - True if voice detected, false otherwise + */ + detected() { + return this.Module._SherpaOnnxVoiceActivityDetectorDetected(this.handle) === 1; + } + + /** + * Reset the detector + */ + reset() { + this.Module._SherpaOnnxVoiceActivityDetectorReset(this.handle); + } + + /** + * Free the detector + */ + free() { + if (this.handle) { + this.Module._SherpaOnnxDestroyVoiceActivityDetector(this.handle); + this.handle = 0; + } + } + } + + // Define the VAD module functionality + SherpaOnnx.VAD = { + /** + * Load a Voice Activity Detection model + * @param {Object} modelConfig - Configuration for the model + * @returns {Promise} - Information about the loaded model + */ + loadModel: async function(modelConfig) { + const debug = modelConfig.debug || false; + const modelDir = modelConfig.modelDir || 'vad-models'; + + // First check for preloaded assets + if (!modelConfig.forceDownload) { + const assetPath = SherpaOnnx.Config.assetPaths.vad; + if (debug) console.log(`Checking for preloaded VAD assets at ${assetPath}`); + + if (SherpaOnnx.FileSystem.fileExists(assetPath)) { + const files = SherpaOnnx.FileSystem.listFiles(assetPath); + if (debug) console.log(`Found preloaded files: ${files.join(', ')}`); + + // Check for required model file + if (files.includes('silero_vad.onnx')) { + if (debug) console.log("Using preloaded VAD model"); + return { + modelDir: assetPath, + actualPaths: { + model: `${assetPath}/silero_vad.onnx` + }, + preloaded: true + }; + } + + if (debug) console.log("Preloaded VAD assets found but missing required files"); + } + } + + // Create directory if it doesn't exist + try { + SherpaOnnx.FileSystem.ensureDirectory(modelDir); + } catch(e) { + console.error(`Failed to create directory ${modelDir}:`, e); + } + + // Collection for actual file paths + const actualPaths = {}; + + // Load VAD model file + const result = await SherpaOnnx.FileSystem.loadFile( + modelConfig.model || 'assets/vad/silero_vad.onnx', + `${modelDir}/silero_vad.onnx`, + debug + ); + + // Collect actual path + actualPaths.model = result.path; + + return { + modelDir, + actualPaths + }; + }, + + /** + * Create a Voice Activity Detector with the loaded model + * @param {Object} loadedModel - Model information returned by loadModel + * @param {Object} options - Additional configuration options + * @returns {VoiceActivityDetector} - A VAD instance + */ + createVoiceActivityDetector: function(loadedModel, options = {}) { + const debug = options.debug !== false; + + try { + // Get the model path from loaded model info + const modelPath = loadedModel.modelPath || `${loadedModel.modelDir}/${loadedModel.fileName || 'silero_vad.onnx'}`; + + if (debug) console.log(`VAD.createVoiceActivityDetector: Using model at ${modelPath}`); + + // Verify model file exists before proceeding + if (!SherpaOnnx.FileSystem.fileExists(modelPath)) { + throw new Error(`Model file not found at ${modelPath}`); + } + + // Initialize the silero VAD config + const sileroVadConfig = this._initSileroVadConfig({ + model: modelPath, + threshold: options.threshold || 0.5, + minSilenceDuration: options.minSilenceDuration || 0.3, + minSpeechDuration: options.minSpeechDuration || 0.1, + windowSize: options.windowSize || 512, + maxSpeechDuration: options.maxSpeechDuration || 30.0, + }, global.Module); + + // Initialize the full VAD config + const vadConfig = this._initVadModelConfig({ + sileroVad: { + model: modelPath, + threshold: options.threshold || 0.5, + minSilenceDuration: options.minSilenceDuration || 0.3, + minSpeechDuration: options.minSpeechDuration || 0.1, + windowSize: options.windowSize || 512, + maxSpeechDuration: options.maxSpeechDuration || 30.0, + }, + sampleRate: options.sampleRate || 16000, + numThreads: options.numThreads || 1, + provider: options.provider || 'cpu', + debug: debug ? 1 : 0, + }, global.Module); + + // Debug print the config if requested + if (debug) { + try { + global.Module._MyPrintVAD(vadConfig.ptr); + } catch (printErr) { + console.warn("Could not print VAD config:", printErr); + } + } + + // Create the detector + if (debug) console.log("VAD.createVoiceActivityDetector: Creating detector"); + const vadPtr = global.Module.ccall( + 'SherpaOnnxCreateVoiceActivityDetector', + 'number', + ['number', 'number'], + [vadConfig.ptr, options.bufferSizeInSeconds || 5.0] + ); + + if (!vadPtr) { + throw new Error("Failed to create voice activity detector"); + } + + if (debug) console.log("VAD.createVoiceActivityDetector: Detector created successfully"); + + // Free configuration memory + SherpaOnnx.Utils.freeConfig(vadConfig, global.Module); + + // Create the detector object + const detector = new VoiceActivityDetector(vadPtr, global.Module); + + // Track the resource for cleanup if tracking function is available + if (SherpaOnnx.trackResource) { + SherpaOnnx.trackResource('vad', detector); + } + + return detector; + } catch (error) { + console.error("Error creating VAD detector:", error); + throw error; + } + }, + + /** + * Initialize SileroVad configuration in WASM + * @param {Object} config - SileroVad configuration + * @param {Object} Module - WebAssembly module + * @returns {Object} Configuration with WASM pointers + * @private + */ + _initSileroVadConfig: function(config, Module) { + const modelString = SherpaOnnx.Utils.allocateString(config.model, Module); + + const len = 6 * 4; + const ptr = Module._malloc(len); + + let offset = 0; + Module.setValue(ptr, modelString.ptr, 'i8*'); + offset += 4; + + Module.setValue(ptr + offset, config.threshold || 0.5, 'float'); + offset += 4; + + Module.setValue(ptr + offset, config.minSilenceDuration || 0.3, 'float'); + offset += 4; + + Module.setValue(ptr + offset, config.minSpeechDuration || 0.1, 'float'); + offset += 4; + + Module.setValue(ptr + offset, config.windowSize || 512, 'i32'); + offset += 4; + + Module.setValue(ptr + offset, config.maxSpeechDuration || 30.0, 'float'); + offset += 4; + + return { + buffer: modelString.ptr, + ptr: ptr, + len: len + }; + }, + + /** + * Initialize VAD model configuration in WASM + * @param {Object} config - VAD configuration + * @param {Object} Module - WebAssembly module + * @returns {Object} Configuration with WASM pointers + * @private + */ + _initVadModelConfig: function(config, Module) { + if (!('sileroVad' in config)) { + throw new Error("Missing sileroVad configuration"); + } + + const sileroVad = this._initSileroVadConfig(config.sileroVad, Module); + + const providerString = SherpaOnnx.Utils.allocateString(config.provider || 'cpu', Module); + + const len = sileroVad.len + 4 * 4; + const ptr = Module._malloc(len); + + let offset = 0; + Module._CopyHeap(sileroVad.ptr, sileroVad.len, ptr + offset); + offset += sileroVad.len; + + Module.setValue(ptr + offset, config.sampleRate || 16000, 'i32'); + offset += 4; + + Module.setValue(ptr + offset, config.numThreads || 1, 'i32'); + offset += 4; + + Module.setValue(ptr + offset, providerString.ptr, 'i8*'); // provider + offset += 4; + + Module.setValue(ptr + offset, config.debug !== undefined ? config.debug : 1, 'i32'); + offset += 4; + + return { + buffer: providerString.ptr, + ptr: ptr, + len: len, + sileroVad: sileroVad + }; + } + }; + +})(typeof window !== 'undefined' ? window : global); \ No newline at end of file diff --git a/wasm/combined/sherpa-onnx-combined.js b/wasm/combined/sherpa-onnx-combined.js new file mode 100644 index 0000000000..283abbc941 --- /dev/null +++ b/wasm/combined/sherpa-onnx-combined.js @@ -0,0 +1,312 @@ +/** + * sherpa-onnx-combined.js + * + * Loader for all Sherpa-ONNX modules + */ + +(function(global) { + // Auto-detect script path to handle loading from different directories + function getScriptPath() { + // For browser environments + if (typeof document !== 'undefined') { + const scripts = document.getElementsByTagName('script'); + for (let i = 0; i < scripts.length; i++) { + const src = scripts[i].src; + if (src.indexOf('sherpa-onnx-combined.js') !== -1) { + // Return the directory path of the script + return src.substring(0, src.lastIndexOf('/') + 1); + } + } + } + // Default path if we can't detect + return ''; + } + + // Get the base path where all JS modules are located + const basePath = getScriptPath(); + console.log("Detected script base path:", basePath); + + // Define module paths relative to the base path + const defaultModules = [ + 'sherpa-onnx-combined-core.js', + 'sherpa-onnx-combined-vad.js', + 'sherpa-onnx-combined-asr.js', + 'sherpa-onnx-combined-tts.js', + 'sherpa-onnx-combined-speaker.js', + 'sherpa-onnx-combined-enhancement.js', + 'sherpa-onnx-combined-kws.js' + ]; + + // Use custom module paths if provided, otherwise use defaults with base path + let modulePaths; + if (typeof window !== 'undefined' && window.sherpaOnnxModulePaths) { + console.log("Using custom module paths from window.sherpaOnnxModulePaths"); + modulePaths = window.sherpaOnnxModulePaths; + } else if (global.sherpaOnnxModulePaths) { + console.log("Using custom module paths from global.sherpaOnnxModulePaths"); + modulePaths = global.sherpaOnnxModulePaths; + } else { + // Apply base path to each module + modulePaths = defaultModules.map(module => basePath + module); + console.log("Using default module paths with detected base path:", modulePaths); + } + + // Keep track of loaded modules + let loadedModules = {}; + let modulesLoading = false; + + // Keep track of active resources to clean up + let activeResources = { + asr: [], + tts: [], + vad: [], + speaker: [], + enhancement: [], + kws: [] + }; + + // Async loader for scripts + const loadScript = function(url) { + return new Promise((resolve, reject) => { + const script = document.createElement('script'); + script.src = url; + script.async = true; + + script.onload = () => { + console.log(`Module ${url} loaded successfully`); + loadedModules[url] = true; + resolve(); + }; + + script.onerror = (e) => { + console.error(`Failed to load script: ${url}`, e); + loadedModules[url] = false; + // Continue loading other modules even if one fails + resolve(); + }; + + document.head.appendChild(script); + }); + }; + + // Check if core module is available + const ensureCoreModule = function() { + if (!global.SherpaOnnx) { + console.error("SherpaOnnx core module not loaded! Other modules will not function properly."); + return false; + } + return true; + }; + + // Load modules in sequence to ensure proper initialization + const loadModulesSequentially = async function() { + if (modulesLoading) return; + + modulesLoading = true; + + try { + // Load core module first since other modules depend on it + console.log("Loading SherpaOnnx core module from: " + modulePaths[0]); + await loadScript(modulePaths[0]); // Use the first module from the paths array + + if (!ensureCoreModule()) { + throw new Error("Failed to load core module"); + } + + // Load the rest of the modules sequentially + for (let i = 1; i < modulePaths.length; i++) { + console.log(`Loading module ${i+1}/${modulePaths.length}: ${modulePaths[i]}`); + await loadScript(modulePaths[i]); + } + + // Check if all critical modules are loaded + let allLoaded = true; + let missingModules = []; + + for (const module of modulePaths) { + if (!loadedModules[module]) { + allLoaded = false; + missingModules.push(module); + } + } + + if (!allLoaded) { + console.warn(`Not all modules loaded successfully. Missing: ${missingModules.join(', ')}`); + } else { + console.log("All SherpaOnnx modules loaded successfully"); + } + + // Add resource tracking and cleanup methods after modules are loaded + if (global.SherpaOnnx) { + // Add resource tracking methods + global.SherpaOnnx.trackResource = function(type, resource) { + if (activeResources[type]) { + activeResources[type].push(resource); + } + return resource; + }; + + // Add cleanup methods + global.SherpaOnnx.cleanup = function(type) { + if (!type) { + // Clean up all resource types if no specific type is provided + Object.keys(activeResources).forEach(t => this.cleanup(t)); + return; + } + + if (activeResources[type]) { + const resources = activeResources[type]; + console.log(`Cleaning up ${resources.length} ${type} resources`); + + for (let i = resources.length - 1; i >= 0; i--) { + try { + if (resources[i] && typeof resources[i].free === 'function') { + resources[i].free(); + } + resources.splice(i, 1); + } catch (e) { + console.error(`Error cleaning up ${type} resource:`, e); + } + } + } + }; + + // Add convenience methods for each resource type + global.SherpaOnnx.cleanupASR = function() { this.cleanup('asr'); }; + global.SherpaOnnx.cleanupTTS = function() { this.cleanup('tts'); }; + global.SherpaOnnx.cleanupVAD = function() { this.cleanup('vad'); }; + global.SherpaOnnx.cleanupSpeaker = function() { this.cleanup('speaker'); }; + global.SherpaOnnx.cleanupEnhancement = function() { this.cleanup('enhancement'); }; + global.SherpaOnnx.cleanupKWS = function() { this.cleanup('kws'); }; + } + + // Call ready callback if defined + if (global.onSherpaOnnxReady) { + console.log("Calling onSherpaOnnxReady callback"); + global.onSherpaOnnxReady(allLoaded, missingModules); + } + } catch (error) { + console.error("Error during module loading:", error); + + if (global.onSherpaOnnxReady) { + global.onSherpaOnnxReady(false, error); + } + } finally { + modulesLoading = false; + } + }; + + // Main initialization function + const initialize = function() { + console.log("initialize() function called. Starting module loading process."); + // Browser environment: load scripts + if (typeof window !== 'undefined') { + console.log("Browser environment detected. Proceeding to load modules sequentially."); + // Load modules sequentially and handle completion/errors + loadModulesSequentially() + .catch(error => { + console.error("Module loading failed:", error); + // Ensure the callback is still called on failure, passing the error + if (global.onSherpaOnnxReady) { + console.log("Calling onSherpaOnnxReady with failure status due to error."); + // Determine if any modules loaded successfully before the error + let anyLoaded = Object.values(loadedModules).some(status => status === true); + let missingModules = modulePaths.filter(path => !loadedModules[path]); + global.onSherpaOnnxReady(anyLoaded && missingModules.length < modulePaths.length, error || missingModules); + } + }); + } else { + console.log("Non-browser environment detected. Skipping module loading."); + } + }; + + // Check if WASM module is already loaded + if (typeof global.Module !== 'undefined' && typeof global.Module.onRuntimeInitialized !== 'undefined') { + const originalOnRuntimeInitialized = global.Module.onRuntimeInitialized; + + global.Module.onRuntimeInitialized = function() { + console.log("WASM module runtime initialized, checking for full initialization including HEAPF32..."); + + if (originalOnRuntimeInitialized) { + originalOnRuntimeInitialized(); + } + + // Wait for full initialization including HEAPF32 + let attempt = 0; + const checkHeapInterval = setInterval(() => { + attempt++; + console.log(`Attempt ${attempt}: Checking if HEAPF32 is available...`); + console.log(`global.Module.HEAPF32 exists: ${!!global.Module.HEAPF32}`); + if (global.Module.HEAPF32) { + console.log("HEAPF32 is available. Proceeding with JavaScript module initialization."); + clearInterval(checkHeapInterval); + initialize(); + } else if (attempt > 120) { // Wait up to 60 seconds (120 * 500ms) + console.error("HEAPF32 not available after 60 seconds. Proceeding anyway with potential issues."); + clearInterval(checkHeapInterval); + initialize(); + } + }, 500); + }; + } else { + // No WASM module yet, set up a listener + global.onModuleReady = function() { + console.log("WASM module ready, proceeding with module initialization"); + // Ensure HEAPF32 is available before proceeding + if (global.Module && global.Module.HEAPF32) { + console.log("HEAPF32 confirmed available via onModuleReady."); + initialize(); + } else { + console.error("onModuleReady called but HEAPF32 not available. Waiting for initialization."); + let attempt = 0; + const readyCheckInterval = setInterval(() => { + attempt++; + console.log(`Ready check attempt ${attempt}: Waiting for HEAPF32...`); + if (global.Module && global.Module.HEAPF32) { + console.log("HEAPF32 now available. Proceeding with initialization."); + clearInterval(readyCheckInterval); + initialize(); + } else if (attempt > 120) { + console.error("HEAPF32 still not available after 60 seconds in onModuleReady. Proceeding with risk."); + clearInterval(readyCheckInterval); + initialize(); + } + }, 500); + } + }; + + // Since HEAPF32 availability was logged, check if it's already available and proceed + if (typeof global.Module !== 'undefined' && global.Module.HEAPF32) { + console.log("HEAPF32 already available. Triggering initialization immediately."); + initialize(); + } else { + console.log("Waiting for WASM module initialization or HEAPF32 availability before loading dependent scripts."); + // Force initialization after a short timeout if no response + console.log("Checking for HEAPF32 availability immediately for debugging."); + if (typeof global.Module !== 'undefined') { + console.log("Module exists. Current HEAPF32 status: ", !!global.Module.HEAPF32); + if (global.Module.HEAPF32) { + console.log("HEAPF32 detected. Proceeding with initialization NOW."); + initialize(); + } else { + console.log("No HEAPF32 yet. Waiting a very short period before forcing initialization."); + setTimeout(() => { + console.log("Immediate timeout reached. Forcing initialization regardless of HEAPF32 status to debug."); + if (typeof global.Module !== 'undefined') { + console.log("Module status at force: ", !!global.Module, "HEAPF32 status: ", !!global.Module.HEAPF32); + } else { + console.log("Module still not defined at force time."); + } + initialize(); + }, 1000); // Force after just 1 second for faster debugging + } + } else { + console.log("Module not yet defined. Waiting for it to appear."); + setTimeout(() => { + console.log("Secondary timeout reached. Forcing initialization regardless of status."); + initialize(); + }, 1000); // Force after just 1 second if Module isn't even defined + } + } + } +})(typeof window !== 'undefined' ? window : global); \ No newline at end of file diff --git a/wasm/combined/sherpa-onnx-wasm-combined.cc b/wasm/combined/sherpa-onnx-wasm-combined.cc new file mode 100644 index 0000000000..82253ebae2 --- /dev/null +++ b/wasm/combined/sherpa-onnx-wasm-combined.cc @@ -0,0 +1,219 @@ +// wasm/combined/sherpa-onnx-wasm-combined.cc +// +// Copyright (c) 2024 Xiaomi Corporation + +#include +#include +#include + +#include "sherpa-onnx/c-api/c-api.h" + +// This is a combined implementation that provides all the necessary C functions +// for the WASM module, incorporating debug printing for all supported features. + +extern "C" { + +// ============================================================================ +// Debug printing functions for all model types +// ============================================================================ + +// Helper function to copy between heap locations +void CopyHeap(const char *src, int32_t num_bytes, char *dst) { + std::copy(src, src + num_bytes, dst); +} + +// Debug printing for Online ASR configuration +void MyPrintOnlineASR(SherpaOnnxOnlineRecognizerConfig *config) { + auto model_config = &config->model_config; + auto feat = &config->feat_config; + auto transducer_model_config = &model_config->transducer; + auto paraformer_model_config = &model_config->paraformer; + auto ctc_model_config = &model_config->zipformer2_ctc; + + fprintf(stdout, "----------Online ASR Configuration----------\n"); + fprintf(stdout, "----------online transducer model config----------\n"); + fprintf(stdout, "encoder: %s\n", transducer_model_config->encoder); + fprintf(stdout, "decoder: %s\n", transducer_model_config->decoder); + fprintf(stdout, "joiner: %s\n", transducer_model_config->joiner); + + fprintf(stdout, "----------online parformer model config----------\n"); + fprintf(stdout, "encoder: %s\n", paraformer_model_config->encoder); + fprintf(stdout, "decoder: %s\n", paraformer_model_config->decoder); + + fprintf(stdout, "----------online ctc model config----------\n"); + fprintf(stdout, "model: %s\n", ctc_model_config->model); + fprintf(stdout, "tokens: %s\n", model_config->tokens); + fprintf(stdout, "num_threads: %d\n", model_config->num_threads); + fprintf(stdout, "provider: %s\n", model_config->provider); + fprintf(stdout, "debug: %d\n", model_config->debug); + fprintf(stdout, "model type: %s\n", model_config->model_type); + fprintf(stdout, "modeling unit: %s\n", model_config->modeling_unit); + fprintf(stdout, "bpe vocab: %s\n", model_config->bpe_vocab); + fprintf(stdout, "tokens_buf: %s\n", + model_config->tokens_buf ? model_config->tokens_buf : ""); + fprintf(stdout, "tokens_buf_size: %d\n", model_config->tokens_buf_size); + + fprintf(stdout, "----------feat config----------\n"); + fprintf(stdout, "sample rate: %d\n", feat->sample_rate); + fprintf(stdout, "feat dim: %d\n", feat->feature_dim); + + fprintf(stdout, "----------recognizer config----------\n"); + fprintf(stdout, "decoding method: %s\n", config->decoding_method); + fprintf(stdout, "max active paths: %d\n", config->max_active_paths); + fprintf(stdout, "enable_endpoint: %d\n", config->enable_endpoint); + fprintf(stdout, "rule1_min_trailing_silence: %.2f\n", + config->rule1_min_trailing_silence); + fprintf(stdout, "rule2_min_trailing_silence: %.2f\n", + config->rule2_min_trailing_silence); + fprintf(stdout, "rule3_min_utterance_length: %.2f\n", + config->rule3_min_utterance_length); + fprintf(stdout, "hotwords_file: %s\n", config->hotwords_file); + fprintf(stdout, "hotwords_score: %.2f\n", config->hotwords_score); + fprintf(stdout, "rule_fsts: %s\n", config->rule_fsts); + fprintf(stdout, "rule_fars: %s\n", config->rule_fars); + fprintf(stdout, "blank_penalty: %f\n", config->blank_penalty); + + fprintf(stdout, "----------ctc fst decoder config----------\n"); + fprintf(stdout, "graph: %s\n", config->ctc_fst_decoder_config.graph); + fprintf(stdout, "max_active: %d\n", + config->ctc_fst_decoder_config.max_active); +} + +// Debug printing for VAD configuration +void MyPrintVAD(SherpaOnnxVadModelConfig *config) { + auto silero_vad = &config->silero_vad; + + fprintf(stdout, "----------Voice Activity Detection Configuration----------\n"); + fprintf(stdout, "----------silero_vad config----------\n"); + fprintf(stdout, "model: %s\n", silero_vad->model); + fprintf(stdout, "threshold: %.3f\n", silero_vad->threshold); + fprintf(stdout, "min_silence_duration: %.3f\n", + silero_vad->min_silence_duration); + fprintf(stdout, "min_speech_duration: %.3f\n", + silero_vad->min_speech_duration); + fprintf(stdout, "window_size: %d\n", silero_vad->window_size); + fprintf(stdout, "max_speech_duration: %.3f\n", + silero_vad->max_speech_duration); + + fprintf(stdout, "----------config----------\n"); + fprintf(stdout, "sample_rate: %d\n", config->sample_rate); + fprintf(stdout, "num_threads: %d\n", config->num_threads); + fprintf(stdout, "provider: %s\n", config->provider); + fprintf(stdout, "debug: %d\n", config->debug); +} + +// Debug printing for TTS configuration +void MyPrintTTS(SherpaOnnxOfflineTtsConfig *tts_config) { + auto tts_model_config = &tts_config->model; + auto vits_model_config = &tts_model_config->vits; + auto matcha_model_config = &tts_model_config->matcha; + auto kokoro = &tts_model_config->kokoro; + + fprintf(stdout, "----------Text-to-Speech Configuration----------\n"); + fprintf(stdout, "----------vits model config----------\n"); + fprintf(stdout, "model: %s\n", vits_model_config->model); + fprintf(stdout, "lexicon: %s\n", vits_model_config->lexicon); + fprintf(stdout, "tokens: %s\n", vits_model_config->tokens); + fprintf(stdout, "data_dir: %s\n", vits_model_config->data_dir); + fprintf(stdout, "noise scale: %.3f\n", vits_model_config->noise_scale); + fprintf(stdout, "noise scale w: %.3f\n", vits_model_config->noise_scale_w); + fprintf(stdout, "length scale: %.3f\n", vits_model_config->length_scale); + fprintf(stdout, "dict_dir: %s\n", vits_model_config->dict_dir); + + fprintf(stdout, "----------matcha model config----------\n"); + fprintf(stdout, "acoustic_model: %s\n", matcha_model_config->acoustic_model); + fprintf(stdout, "vocoder: %s\n", matcha_model_config->vocoder); + fprintf(stdout, "lexicon: %s\n", matcha_model_config->lexicon); + fprintf(stdout, "tokens: %s\n", matcha_model_config->tokens); + fprintf(stdout, "data_dir: %s\n", matcha_model_config->data_dir); + fprintf(stdout, "noise scale: %.3f\n", matcha_model_config->noise_scale); + fprintf(stdout, "length scale: %.3f\n", matcha_model_config->length_scale); + fprintf(stdout, "dict_dir: %s\n", matcha_model_config->dict_dir); + + fprintf(stdout, "----------kokoro model config----------\n"); + fprintf(stdout, "model: %s\n", kokoro->model); + fprintf(stdout, "voices: %s\n", kokoro->voices); + fprintf(stdout, "tokens: %s\n", kokoro->tokens); + fprintf(stdout, "data_dir: %s\n", kokoro->data_dir); + fprintf(stdout, "length scale: %.3f\n", kokoro->length_scale); + fprintf(stdout, "dict_dir: %s\n", kokoro->dict_dir); + fprintf(stdout, "lexicon: %s\n", kokoro->lexicon); + + fprintf(stdout, "----------tts model config----------\n"); + fprintf(stdout, "num threads: %d\n", tts_model_config->num_threads); + fprintf(stdout, "debug: %d\n", tts_model_config->debug); + fprintf(stdout, "provider: %s\n", tts_model_config->provider); + + fprintf(stdout, "----------tts config----------\n"); + fprintf(stdout, "rule_fsts: %s\n", tts_config->rule_fsts); + fprintf(stdout, "rule_fars: %s\n", tts_config->rule_fars); + fprintf(stdout, "max num sentences: %d\n", tts_config->max_num_sentences); + fprintf(stdout, "silence scale: %.3f\n", tts_config->silence_scale); +} + +// Debug printing for Speaker Diarization configuration +void MyPrintSpeakerDiarization(const SherpaOnnxOfflineSpeakerDiarizationConfig *sd_config) { + const auto &segmentation = sd_config->segmentation; + const auto &embedding = sd_config->embedding; + const auto &clustering = sd_config->clustering; + + fprintf(stdout, "----------Speaker Diarization Configuration----------\n"); + fprintf(stdout, "----------segmentation config----------\n"); + fprintf(stdout, "pyannote model: %s\n", segmentation.pyannote.model); + fprintf(stdout, "num threads: %d\n", segmentation.num_threads); + fprintf(stdout, "debug: %d\n", segmentation.debug); + fprintf(stdout, "provider: %s\n", segmentation.provider); + + fprintf(stdout, "----------embedding config----------\n"); + fprintf(stdout, "model: %s\n", embedding.model); + fprintf(stdout, "num threads: %d\n", embedding.num_threads); + fprintf(stdout, "debug: %d\n", embedding.debug); + fprintf(stdout, "provider: %s\n", embedding.provider); + + fprintf(stdout, "----------clustering config----------\n"); + fprintf(stdout, "num_clusters: %d\n", clustering.num_clusters); + fprintf(stdout, "threshold: %.3f\n", clustering.threshold); + + fprintf(stdout, "min_duration_on: %.3f\n", sd_config->min_duration_on); + fprintf(stdout, "min_duration_off: %.3f\n", sd_config->min_duration_off); +} + +// Debug printing for Speech Enhancement configuration +void MyPrintSpeechEnhancement(SherpaOnnxOfflineSpeechDenoiserConfig *config) { + auto model = &config->model; + auto gtcrn = &model->gtcrn; + + fprintf(stdout, "----------Speech Enhancement Configuration----------\n"); + fprintf(stdout, "----------offline speech denoiser model config----------\n"); + fprintf(stdout, "gtcrn: %s\n", gtcrn->model); + fprintf(stdout, "num threads: %d\n", model->num_threads); + fprintf(stdout, "debug: %d\n", model->debug); + fprintf(stdout, "provider: %s\n", model->provider); +} + +// Debug printing for Keyword Spotting configuration +void MyPrintKeywordSpotting(SherpaOnnxKeywordSpotterConfig *config) { + auto feat = &config->feat_config; + auto model = &config->model_config; + auto transducer = &model->transducer; + + fprintf(stdout, "----------Keyword Spotting Configuration----------\n"); + fprintf(stdout, "model_config.transducer.encoder: %s\n", transducer->encoder); + fprintf(stdout, "model_config.transducer.decoder: %s\n", transducer->decoder); + fprintf(stdout, "model_config.transducer.joiner: %s\n", transducer->joiner); + fprintf(stdout, "model_config.tokens: %s\n", model->tokens); + fprintf(stdout, "model_config.num_threads: %d\n", model->num_threads); + fprintf(stdout, "model_config.provider: %s\n", model->provider); + fprintf(stdout, "model_config.debug: %d\n", model->debug); + + fprintf(stdout, "feat_config.sample_rate: %d\n", feat->sample_rate); + fprintf(stdout, "feat_config.feature_dim: %d\n", feat->feature_dim); + + fprintf(stdout, "max_active_paths: %d\n", config->max_active_paths); + fprintf(stdout, "num_trailing_blanks: %d\n", config->num_trailing_blanks); + fprintf(stdout, "keywords_score: %.3f\n", config->keywords_score); + fprintf(stdout, "keywords_threshold: %.3f\n", config->keywords_threshold); + fprintf(stdout, "keywords_file: %s\n", config->keywords_file ? config->keywords_file : ""); +} + +} // extern "C" \ No newline at end of file