diff --git a/examples/bench/bench.cpp b/examples/bench/bench.cpp index 54f73110d42..d96c1531a8e 100644 --- a/examples/bench/bench.cpp +++ b/examples/bench/bench.cpp @@ -11,6 +11,9 @@ struct whisper_params { int32_t what = 0; // what to benchmark: 0 - whisper encoder, 1 - memcpy, 2 - ggml_mul_mat std::string model = "models/ggml-base.en.bin"; + std::string coreml_dir = ""; + std::string openvino_dir = ""; + bool disable_coreml = false; bool use_gpu = true; bool flash_attn = false; @@ -28,7 +31,10 @@ static bool whisper_params_parse(int argc, char ** argv, whisper_params & params } else if (arg == "-t" || arg == "--threads") { params.n_threads = std::stoi(argv[++i]); } else if (arg == "-m" || arg == "--model") { params.model = argv[++i]; } + else if (arg == "-l" || arg == "--coreml") { params.coreml_dir = argv[++i]; } + else if (arg == "-v" || arg == "--openvino") { params.openvino_dir = argv[++i]; } else if (arg == "-w" || arg == "--what") { params.what = atoi(argv[++i]); } + else if (arg == "-ml" || arg == "--disable-ml") { params.disable_coreml = true; } else if (arg == "-ng" || arg == "--no-gpu") { params.use_gpu = false; } else if (arg == "-fa" || arg == "--flash-attn") { params.flash_attn = true; } else { @@ -55,17 +61,31 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para fprintf(stderr, " %-7s 2 - ggml_mul_mat\n", ""); fprintf(stderr, " -ng, --no-gpu [%-7s] disable GPU\n", params.use_gpu ? "false" : "true"); fprintf(stderr, " -fa, --flash-attn [%-7s] enable flash attention\n", params.flash_attn ? "true" : "false"); + fprintf(stderr, " -ml, --disable-ml [%-7s] disable CoreML\n", params.disable_coreml ? "true" : "false"); + fprintf(stderr, " -l --coreml [%-7s] Set CoreML Directory\n", params.coreml_dir.c_str()); + fprintf(stderr, " -v --openvino [%-7s] Set OpenVINO Directory\n", params.openvino_dir.c_str()); fprintf(stderr, "\n"); } +static char * string_to_ptr(const std::string s) { + return strdup(s.c_str()); +} + static int whisper_bench_full(const whisper_params & params) { // whisper init struct whisper_context_params cparams = whisper_context_default_params(); + char * sptr1 = nullptr; + char * sptr2 = nullptr; cparams.use_gpu = params.use_gpu; cparams.flash_attn = params.flash_attn; - + sptr1 = string_to_ptr(params.coreml_dir); + cparams.path_openvino = sptr1; + sptr2 = string_to_ptr(params.openvino_dir); + cparams.path_coreml = sptr2; + cparams.disable_coreml = params.disable_coreml; + struct whisper_context * ctx = whisper_init_from_file_with_params(params.model.c_str(), cparams); { @@ -138,6 +158,8 @@ static int whisper_bench_full(const whisper_params & params) { } whisper_print_timings(ctx); + free(sptr2); + free(sptr1); whisper_free(ctx); fprintf(stderr, "\n"); diff --git a/examples/cli/cli.cpp b/examples/cli/cli.cpp index 7d5d0ffea35..931c29efbfc 100644 --- a/examples/cli/cli.cpp +++ b/examples/cli/cli.cpp @@ -90,6 +90,10 @@ struct whisper_params { // A regular expression that matches tokens to suppress std::string suppress_regex; + std::string openvino_directory = ""; + std::string coreml_directory = ""; + bool disable_coreml = false; + std::string openvino_encode_device = "CPU"; std::string dtw = ""; @@ -186,6 +190,9 @@ static bool whisper_params_parse(int argc, char ** argv, whisper_params & params else if ( arg == "--prompt") { params.prompt = ARGV_NEXT; } else if (arg == "-m" || arg == "--model") { params.model = ARGV_NEXT; } else if (arg == "-f" || arg == "--file") { params.fname_inp.emplace_back(ARGV_NEXT); } + else if (arg == "-dov" || arg == "--ov-directory") { params.openvino_directory = ARGV_NEXT; } + else if (arg == "-docml"|| arg == "--ocml-directory") { params.coreml_directory = ARGV_NEXT; } + else if (arg == "-noml" || arg == "--disable-coreml") { params.disable_coreml = true; } else if (arg == "-oved" || arg == "--ov-e-device") { params.openvino_encode_device = ARGV_NEXT; } else if (arg == "-dtw" || arg == "--dtw") { params.dtw = ARGV_NEXT; } else if (arg == "-ls" || arg == "--log-score") { params.log_score = true; } @@ -265,6 +272,8 @@ static void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params fprintf(stderr, " -m FNAME, --model FNAME [%-7s] model path\n", params.model.c_str()); fprintf(stderr, " -f FNAME, --file FNAME [%-7s] input audio file path\n", ""); fprintf(stderr, " -oved D, --ov-e-device DNAME [%-7s] the OpenVINO device used for encode inference\n", params.openvino_encode_device.c_str()); + fprintf(stderr, " -dov DN, --ov-directory DN [%-7s] the OpenVINO directory path\n", params.openvino_directory.c_str()); + fprintf(stderr, " -noml, --disable-coreml [%-7s] Disable CoreML\n", params.disable_coreml ? "true" : "false"); fprintf(stderr, " -dtw MODEL --dtw MODEL [%-7s] compute token-level timestamps\n", params.dtw.c_str()); fprintf(stderr, " -ls, --log-score [%-7s] log best decoder scores of tokens\n", params.log_score?"true":"false"); fprintf(stderr, " -ng, --no-gpu [%-7s] disable GPU\n", params.use_gpu ? "false" : "true"); @@ -883,6 +892,10 @@ static void output_lrc(struct whisper_context * ctx, std::ofstream & fout, const } +static char * string_to_ptr(const std::string s) { + return strdup(s.c_str()); +} + static void cb_log_disable(enum ggml_log_level , const char * , void * ) { } int main(int argc, char ** argv) { @@ -894,6 +907,8 @@ int main(int argc, char ** argv) { #endif whisper_params params; + char * sptr1 = nullptr; + char * sptr2 = nullptr; // If the only argument starts with "@", read arguments line-by-line // from the given file. @@ -970,7 +985,12 @@ int main(int argc, char ** argv) { cparams.use_gpu = params.use_gpu; cparams.flash_attn = params.flash_attn; - + sptr1 = string_to_ptr(params.coreml_directory); + cparams.path_coreml = sptr1; + sptr2 = string_to_ptr(params.openvino_directory); + cparams.path_openvino = sptr2; + cparams.disable_coreml = params.disable_coreml; + if (!params.dtw.empty()) { cparams.dtw_token_timestamps = true; cparams.dtw_aheads_preset = WHISPER_AHEADS_NONE; @@ -1263,6 +1283,9 @@ int main(int argc, char ** argv) { if (!params.no_prints) { whisper_print_timings(ctx); } + free(sptr2); + free(sptr1); + whisper_free(ctx); return 0; diff --git a/include/whisper.h b/include/whisper.h index 4aeda98f334..aa9740b9c00 100644 --- a/include/whisper.h +++ b/include/whisper.h @@ -125,6 +125,12 @@ extern "C" { int dtw_n_top; struct whisper_aheads dtw_aheads; + // Allow coreml + openvino files to have their own directories + // Purposely not ifdef'd + char * path_coreml; + char * path_openvino; + bool disable_coreml; + size_t dtw_mem_size; // TODO: remove }; diff --git a/src/whisper.cpp b/src/whisper.cpp index ad4e7a12d71..271ca146ef7 100644 --- a/src/whisper.cpp +++ b/src/whisper.cpp @@ -35,6 +35,7 @@ #include #include #include +#include // For data_file_exists #if defined(WHISPER_BIG_ENDIAN) template @@ -979,6 +980,9 @@ struct whisper_context { whisper_state * state = nullptr; std::string path_model; // populated by whisper_init_from_file_with_params() + + std::string path_coreml; // populated by whisper_init_from_file_with_params() + std::string path_openvino; // populated by whisper_init_from_file_with_params() }; struct whisper_global { @@ -3341,9 +3345,52 @@ static std::vector tokenize(const whisper_vocab & vocab, cons // interface implementation // +static bool data_file_exists (const char * filename) { + struct stat buffer; + return (stat (filename, &buffer) == 0); +} + +static std::string replace_extra_data_directory(std::string path_bin, std::string replacement_path, bool must_exist = true) { + std::string new_path = replacement_path; + std::string file_part = path_bin; + + // Check replacement_path actually exists + if(must_exist && !data_file_exists(new_path.c_str())) { + fprintf(stderr, "Trying to replace with non-existant path %s returning passed path %s\n", replacement_path.c_str(), path_bin.c_str()); + return path_bin; + } + + // Win 10/11 accepts both slashes while Linux/Mac only uses / + auto pos = file_part.find_last_of("/\\"); + if (pos != std::string::npos) { + file_part = file_part.substr(pos + 1, std::string::npos); + } + + + pos = new_path.find_last_of("/\\"); + + // Append trailing slash if required + if(pos < new_path.length() - 1) { + #ifdef _WIN32 + new_path = new_path + "\\"; + #else + new_path = new_path + "/"; + #endif + } + + new_path = new_path + file_part; + + if(must_exist && !data_file_exists(new_path.c_str())) { + fprintf(stderr, "Error replacing path %s returning passed path %s\n", replacement_path.c_str(), path_bin.c_str()); + return path_bin; + } + + return new_path; +} + #ifdef WHISPER_USE_COREML // replace .bin with -encoder.mlmodelc -static std::string whisper_get_coreml_path_encoder(std::string path_bin) { +static std::string whisper_get_coreml_path_encoder(std::string path_bin, std::string path_coreml) { auto pos = path_bin.rfind('.'); if (pos != std::string::npos) { path_bin = path_bin.substr(0, pos); @@ -3360,13 +3407,18 @@ static std::string whisper_get_coreml_path_encoder(std::string path_bin) { path_bin += "-encoder.mlmodelc"; + if(!path_coreml.empty()) { + path_bin = replace_extra_data_directory(path_bin, path_coreml); + fprintf(stderr, "Replacement CoreML path %s\n", path_bin.c_str()); + } + return path_bin; } #endif #ifdef WHISPER_USE_OPENVINO // replace .bin with-encoder-openvino.xml -static std::string whisper_openvino_get_path_encoder(std::string path_bin) { +static std::string whisper_openvino_get_path_encoder(std::string path_bin, std::string path_openvino) { auto pos = path_bin.rfind('.'); if (pos != std::string::npos) { path_bin = path_bin.substr(0, pos); @@ -3374,10 +3426,15 @@ static std::string whisper_openvino_get_path_encoder(std::string path_bin) { path_bin += "-encoder-openvino.xml"; + if(!path_openvino.empty()) { + path_bin = replace_extra_data_directory(path_bin, path_openvino); + fprintf(stderr, "Replacement OpenVINO path %s\n", path_bin.c_str()); + } + return path_bin; } -static std::string whisper_openvino_get_path_cache(std::string path_bin) { +static std::string whisper_openvino_get_path_cache(std::string path_bin, std::string path_openvino) { auto pos = path_bin.rfind('.'); if (pos != std::string::npos) { path_bin = path_bin.substr(0, pos); @@ -3385,6 +3442,12 @@ static std::string whisper_openvino_get_path_cache(std::string path_bin) { path_bin += "-encoder-openvino-cache"; + if(!path_openvino.empty()) { + // This path doesn't have to exist as it may be created + path_bin = replace_extra_data_directory(path_bin, path_openvino, false); + fprintf(stderr, "Replacement OpenVINO cache path %s\n", path_bin.c_str()); + } + return path_bin; } #endif @@ -3456,20 +3519,22 @@ struct whisper_state * whisper_init_state(whisper_context * ctx) { } #ifdef WHISPER_USE_COREML - const auto path_coreml = whisper_get_coreml_path_encoder(ctx->path_model); + if(!ctx->params.disable_coreml) { + const auto path_coreml = whisper_get_coreml_path_encoder(ctx->path_model, ctx->params.path_coreml); - WHISPER_LOG_INFO("%s: loading Core ML model from '%s'\n", __func__, path_coreml.c_str()); - WHISPER_LOG_INFO("%s: first run on a device may take a while ...\n", __func__); + WHISPER_LOG_INFO("%s: loading Core ML model from '%s'\n", __func__, path_coreml.c_str()); + WHISPER_LOG_INFO("%s: first run on a device may take a while ...\n", __func__); - state->ctx_coreml = whisper_coreml_init(path_coreml.c_str()); - if (!state->ctx_coreml) { - WHISPER_LOG_ERROR("%s: failed to load Core ML model from '%s'\n", __func__, path_coreml.c_str()); + state->ctx_coreml = whisper_coreml_init(path_coreml.c_str()); + if (!state->ctx_coreml) { + WHISPER_LOG_ERROR("%s: failed to load Core ML model from '%s'\n", __func__, path_coreml.c_str()); #ifndef WHISPER_COREML_ALLOW_FALLBACK - whisper_free_state(state); - return nullptr; + whisper_free_state(state); + return nullptr; #endif - } else { - WHISPER_LOG_INFO("%s: Core ML model loaded\n", __func__); + } else { + WHISPER_LOG_INFO("%s: Core ML model loaded\n", __func__); + } } #endif @@ -3585,17 +3650,17 @@ int whisper_ctx_init_openvino_encoder_with_state( std::string path_encoder; if (!model_path) { //if model_path is not set, attempt to find it in the same directory as ggml-.bin model - path_encoder = whisper_openvino_get_path_encoder(ctx->path_model); + path_encoder = whisper_openvino_get_path_encoder(ctx->path_model, ctx->params.path_openvino); } else { - path_encoder = model_path; + path_encoder = replace_extra_data_directory(model_path, ctx->params.path_openvino); } std::string path_cache; if (!cache_dir) { //if cache_dir is not set, set it as a dir residing next to ggml-.bin - path_cache = whisper_openvino_get_path_cache(ctx->path_model); + path_cache = whisper_openvino_get_path_cache(ctx->path_model, ctx->params.path_openvino); } else { - path_cache = cache_dir; + path_cache = replace_extra_data_directory(cache_dir, ctx->params.path_openvino); } WHISPER_LOG_INFO("%s: loading OpenVINO model from '%s'\n", __func__, path_encoder.c_str()); @@ -3634,6 +3699,9 @@ struct whisper_context_params whisper_context_default_params() { /*.n_heads =*/ 0, /*.heads =*/ NULL, }, + /*.path_coreml =*/ nullptr, + /*.path_openvino =*/ nullptr, + /*.disable_coreml =*/ false, /*.dtw_mem_size =*/ 1024*1024*128, }; return result;