|
| 1 | +// c-api-examples/offline-stt-c-api.c |
| 2 | +// |
| 3 | +// Copyright (c) 2024 Xiaomi Corporation |
| 4 | + |
| 5 | +// We assume you have pre-downloaded the whisper multi-lingual models |
| 6 | +// from https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models |
| 7 | +// An example command to download the "tiny" whisper model is given below: |
| 8 | +// |
| 9 | +// clang-format off |
| 10 | +// |
| 11 | +// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.tar.bz2 |
| 12 | +// tar xvf sherpa-onnx-whisper-tiny.tar.bz2 |
| 13 | +// rm sherpa-onnx-whisper-tiny.tar.bz2 |
| 14 | +// |
| 15 | +// clang-format on |
| 16 | + |
| 17 | +#include <stdio.h> |
| 18 | +#include <stdlib.h> |
| 19 | +#include <string.h> |
| 20 | + |
| 21 | +#include "sherpa-onnx/c-api/c-api.h" |
| 22 | + |
| 23 | +int32_t main() { |
| 24 | + // You can find more test waves from |
| 25 | + // https://hf-mirror.com/spaces/k2-fsa/spoken-language-identification/tree/main/test_wavs |
| 26 | + const char *wav_filename = "./sherpa-onnx-whisper-tiny/test_wavs/0.wav"; |
| 27 | + const char *encoder_filename = "sherpa-onnx-whisper-tiny/tiny-encoder.onnx"; |
| 28 | + const char *decoder_filename = "sherpa-onnx-whisper-tiny/tiny-decoder.onnx"; |
| 29 | + const char *tokens_filename = "sherpa-onnx-whisper-tiny/tiny-tokens.txt"; |
| 30 | + const char *language = "en"; |
| 31 | + const char *provider = "cpu"; |
| 32 | + |
| 33 | + const SherpaOnnxWave *wave = SherpaOnnxReadWave(wav_filename); |
| 34 | + if (wave == NULL) { |
| 35 | + fprintf(stderr, "Failed to read %s\n", wav_filename); |
| 36 | + return -1; |
| 37 | + } |
| 38 | + |
| 39 | + // Whisper config |
| 40 | + SherpaOnnxOfflineWhisperModelConfig whisper_config; |
| 41 | + memset(&whisper_config, 0, sizeof(whisper_config)); |
| 42 | + whisper_config.decoder = decoder_filename; |
| 43 | + whisper_config.encoder = encoder_filename; |
| 44 | + whisper_config.language = language; |
| 45 | + whisper_config.tail_paddings = 0; |
| 46 | + whisper_config.task = "transcribe"; |
| 47 | + |
| 48 | + // Offline model config |
| 49 | + SherpaOnnxOfflineModelConfig offline_model_config; |
| 50 | + memset(&offline_model_config, 0, sizeof(offline_model_config)); |
| 51 | + offline_model_config.bpe_vocab = ""; |
| 52 | + offline_model_config.debug = 1; |
| 53 | + offline_model_config.num_threads = 1; |
| 54 | + offline_model_config.provider = provider; |
| 55 | + offline_model_config.tokens = tokens_filename; |
| 56 | + offline_model_config.whisper = whisper_config; |
| 57 | + offline_model_config.sense_voice = |
| 58 | + (SherpaOnnxOfflineSenseVoiceModelConfig){"", "", 0}; |
| 59 | + |
| 60 | + // Recognizer config |
| 61 | + SherpaOnnxOfflineRecognizerConfig recognizer_config; |
| 62 | + memset(&recognizer_config, 0, sizeof(recognizer_config)); |
| 63 | + recognizer_config.decoding_method = "greedy_search"; |
| 64 | + recognizer_config.feat_config = (SherpaOnnxFeatureConfig){16000, 512}; |
| 65 | + recognizer_config.model_config = offline_model_config; |
| 66 | + |
| 67 | + SherpaOnnxOfflineRecognizer *recognizer = |
| 68 | + CreateOfflineRecognizer(&recognizer_config); |
| 69 | + |
| 70 | + SherpaOnnxOfflineStream *stream = CreateOfflineStream(recognizer); |
| 71 | + |
| 72 | + AcceptWaveformOffline(stream, wave->sample_rate, wave->samples, |
| 73 | + wave->num_samples); |
| 74 | + DecodeOfflineStream(recognizer, stream); |
| 75 | + SherpaOnnxOfflineRecognizerResult *result = GetOfflineStreamResult(stream); |
| 76 | + |
| 77 | + fprintf(stderr, "Decoded text: %s\n", result->text); |
| 78 | + |
| 79 | + DestroyOfflineRecognizerResult(result); |
| 80 | + DestroyOfflineStream(stream); |
| 81 | + DestroyOfflineRecognizer(recognizer); |
| 82 | + SherpaOnnxFreeWave(wave); |
| 83 | + |
| 84 | + return 0; |
| 85 | +} |
0 commit comments