Skip to content

Commit d27ef03

Browse files
committed
Disable CoreML
1 parent f890560 commit d27ef03

File tree

4 files changed

+134
-19
lines changed

4 files changed

+134
-19
lines changed

examples/bench/bench.cpp

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,9 @@ struct whisper_params {
1111
int32_t what = 0; // what to benchmark: 0 - whisper encoder, 1 - memcpy, 2 - ggml_mul_mat
1212

1313
std::string model = "models/ggml-base.en.bin";
14+
std::string coreml_dir = "";
15+
std::string openvino_dir = "";
16+
bool disable_coreml = false;
1417

1518
bool use_gpu = true;
1619
bool flash_attn = false;
@@ -28,7 +31,10 @@ static bool whisper_params_parse(int argc, char ** argv, whisper_params & params
2831
}
2932
else if (arg == "-t" || arg == "--threads") { params.n_threads = std::stoi(argv[++i]); }
3033
else if (arg == "-m" || arg == "--model") { params.model = argv[++i]; }
34+
else if (arg == "-l" || arg == "--coreml") { params.coreml_dir = argv[++i]; }
35+
else if (arg == "-v" || arg == "--openvino") { params.openvino_dir = argv[++i]; }
3136
else if (arg == "-w" || arg == "--what") { params.what = atoi(argv[++i]); }
37+
else if (arg == "-ml" || arg == "--disable-coreml") { params.disable_coreml = true; }
3238
else if (arg == "-ng" || arg == "--no-gpu") { params.use_gpu = false; }
3339
else if (arg == "-fa" || arg == "--flash-attn") { params.flash_attn = true; }
3440
else {
@@ -55,17 +61,27 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para
5561
fprintf(stderr, " %-7s 2 - ggml_mul_mat\n", "");
5662
fprintf(stderr, " -ng, --no-gpu [%-7s] disable GPU\n", params.use_gpu ? "false" : "true");
5763
fprintf(stderr, " -fa, --flash-attn [%-7s] enable flash attention\n", params.flash_attn ? "true" : "false");
64+
fprintf(stderr, " -ml, --disable-coreml disable CoreML\n");
65+
fprintf(stderr, " -l --coreml Set CoreML Directory\n");
66+
fprintf(stderr, " -v --openvino Set OpenVINO Directory\n");
5867
fprintf(stderr, "\n");
5968
}
6069

70+
static char * string_to_ptr(const std::string s) {
71+
return strdup(s.c_str());
72+
}
73+
6174
static int whisper_bench_full(const whisper_params & params) {
6275
// whisper init
6376

6477
struct whisper_context_params cparams = whisper_context_default_params();
6578

6679
cparams.use_gpu = params.use_gpu;
6780
cparams.flash_attn = params.flash_attn;
68-
81+
cparams.path_openvino = string_to_ptr(params.openvino_dir);
82+
cparams.path_coreml = string_to_ptr(params.coreml_dir);
83+
cparams.disable_coreml = params.disable_coreml;
84+
6985
struct whisper_context * ctx = whisper_init_from_file_with_params(params.model.c_str(), cparams);
7086

7187
{

examples/cli/cli.cpp

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,10 @@ struct whisper_params {
9090
// A regular expression that matches tokens to suppress
9191
std::string suppress_regex;
9292

93+
std::string openvino_directory = "";
94+
std::string coreml_directory = "";
95+
bool disable_coreml = false;
96+
9397
std::string openvino_encode_device = "CPU";
9498

9599
std::string dtw = "";
@@ -186,6 +190,9 @@ static bool whisper_params_parse(int argc, char ** argv, whisper_params & params
186190
else if ( arg == "--prompt") { params.prompt = ARGV_NEXT; }
187191
else if (arg == "-m" || arg == "--model") { params.model = ARGV_NEXT; }
188192
else if (arg == "-f" || arg == "--file") { params.fname_inp.emplace_back(ARGV_NEXT); }
193+
else if (arg == "-dov" || arg == "--ov-directory") { params.openvino_directory = ARGV_NEXT; }
194+
else if (arg == "-docml"|| arg == "--ocml-directory") { params.coreml_directory = ARGV_NEXT; }
195+
else if (arg == "-noml" || arg == "--disable-coreml") { params.disable_coreml = true; }
189196
else if (arg == "-oved" || arg == "--ov-e-device") { params.openvino_encode_device = ARGV_NEXT; }
190197
else if (arg == "-dtw" || arg == "--dtw") { params.dtw = ARGV_NEXT; }
191198
else if (arg == "-ls" || arg == "--log-score") { params.log_score = true; }
@@ -265,6 +272,8 @@ static void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params
265272
fprintf(stderr, " -m FNAME, --model FNAME [%-7s] model path\n", params.model.c_str());
266273
fprintf(stderr, " -f FNAME, --file FNAME [%-7s] input audio file path\n", "");
267274
fprintf(stderr, " -oved D, --ov-e-device DNAME [%-7s] the OpenVINO device used for encode inference\n", params.openvino_encode_device.c_str());
275+
fprintf(stderr, " -dov DN, --ov-directory DN [%-7s] the OpenVINO directory path\n", params.openvino_directory.c_str());
276+
fprintf(stderr, " -nlml, --disable-coreml Disable CoreML\n", params.disable_coreml ? "true" : "false");
268277
fprintf(stderr, " -dtw MODEL --dtw MODEL [%-7s] compute token-level timestamps\n", params.dtw.c_str());
269278
fprintf(stderr, " -ls, --log-score [%-7s] log best decoder scores of tokens\n", params.log_score?"true":"false");
270279
fprintf(stderr, " -ng, --no-gpu [%-7s] disable GPU\n", params.use_gpu ? "false" : "true");
@@ -883,6 +892,10 @@ static void output_lrc(struct whisper_context * ctx, std::ofstream & fout, const
883892
}
884893

885894

895+
static char * string_to_ptr(const std::string s) {
896+
return strdup(s.c_str());
897+
}
898+
886899
static void cb_log_disable(enum ggml_log_level , const char * , void * ) { }
887900

888901
int main(int argc, char ** argv) {
@@ -970,7 +983,10 @@ int main(int argc, char ** argv) {
970983

971984
cparams.use_gpu = params.use_gpu;
972985
cparams.flash_attn = params.flash_attn;
973-
986+
cparams.path_coreml = string_to_ptr(params.coreml_directory);
987+
cparams.path_openvino = string_to_ptr(params.openvino_directory);
988+
cparams.disable_coreml = params.disable_coreml;
989+
974990
if (!params.dtw.empty()) {
975991
cparams.dtw_token_timestamps = true;
976992
cparams.dtw_aheads_preset = WHISPER_AHEADS_NONE;

include/whisper.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,12 @@ extern "C" {
125125
int dtw_n_top;
126126
struct whisper_aheads dtw_aheads;
127127

128+
// Allow coreml + openvino files to have their own directories
129+
// Purposely not ifdef'd
130+
char * path_coreml;
131+
char * path_openvino;
132+
bool disable_coreml;
133+
128134
size_t dtw_mem_size; // TODO: remove
129135
};
130136

src/whisper.cpp

Lines changed: 94 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,9 @@
3535
#include <string>
3636
#include <thread>
3737
#include <vector>
38+
#include <sys/stat.h> // For data_file_exists
39+
40+
#define NDEBUG
3841

3942
#if defined(WHISPER_BIG_ENDIAN)
4043
template<typename T>
@@ -979,6 +982,9 @@ struct whisper_context {
979982
whisper_state * state = nullptr;
980983

981984
std::string path_model; // populated by whisper_init_from_file_with_params()
985+
986+
std::string path_coreml; // populated by whisper_init_from_file_with_params()
987+
std::string path_openvino; // populated by whisper_init_from_file_with_params()
982988
};
983989

984990
struct whisper_global {
@@ -3341,9 +3347,56 @@ static std::vector<whisper_vocab::id> tokenize(const whisper_vocab & vocab, cons
33413347
// interface implementation
33423348
//
33433349

3350+
static bool data_file_exists (const char * filename) {
3351+
struct stat buffer;
3352+
return (stat (filename, &buffer) == 0);
3353+
}
3354+
3355+
static std::string replace_extra_data_directory(std::string path_bin, std::string replacement_path, bool must_exist = true) {
3356+
std::string new_path = replacement_path;
3357+
std::string file_part = path_bin;
3358+
3359+
// Check replacement_path actually exists
3360+
if(must_exist && !data_file_exists(new_path.c_str())) {
3361+
#ifdef NDEBUG
3362+
fprintf(stderr, "Trying to replace with non-existant path %s returning passed path %s\n", replacement_path.c_str(), path_bin.c_str());
3363+
#endif
3364+
return path_bin;
3365+
}
3366+
3367+
// Win 10/11 accepts both slashes while Linux/Mac only uses /
3368+
auto pos = file_part.find_last_of("/\\");
3369+
if (pos != std::string::npos) {
3370+
file_part = file_part.substr(pos + 1, std::string::npos);
3371+
}
3372+
3373+
3374+
pos = new_path.find_last_of("/\\");
3375+
3376+
// Append trailing slash if required
3377+
if(pos < new_path.length() - 1) {
3378+
#ifdef _WIN32
3379+
new_path = new_path + "\\";
3380+
#else
3381+
new_path = new_path + "/";
3382+
#endif
3383+
}
3384+
3385+
new_path = new_path + file_part;
3386+
3387+
if(must_exist && !data_file_exists(new_path.c_str())) {
3388+
#ifdef NDEBUG
3389+
fprintf(stderr, "Error replacing path %s returning passed path %s\n", replacement_path.c_str(), path_bin.c_str());
3390+
#endif
3391+
return path_bin;
3392+
}
3393+
3394+
return new_path;
3395+
}
3396+
33443397
#ifdef WHISPER_USE_COREML
33453398
// replace .bin with -encoder.mlmodelc
3346-
static std::string whisper_get_coreml_path_encoder(std::string path_bin) {
3399+
static std::string whisper_get_coreml_path_encoder(std::string path_bin, std::string path_coreml) {
33473400
auto pos = path_bin.rfind('.');
33483401
if (pos != std::string::npos) {
33493402
path_bin = path_bin.substr(0, pos);
@@ -3360,31 +3413,47 @@ static std::string whisper_get_coreml_path_encoder(std::string path_bin) {
33603413

33613414
path_bin += "-encoder.mlmodelc";
33623415

3416+
if(!path_coreml.empty()) {
3417+
path_bin = replace_extra_data_directory(path_bin, path_coreml);
3418+
fprintf(stderr, "Replacement CoreML path %s\n", path_bin.c_str());
3419+
}
3420+
33633421
return path_bin;
33643422
}
33653423
#endif
33663424

33673425
#ifdef WHISPER_USE_OPENVINO
33683426
// replace .bin with-encoder-openvino.xml
3369-
static std::string whisper_openvino_get_path_encoder(std::string path_bin) {
3427+
static std::string whisper_openvino_get_path_encoder(std::string path_bin, std::string path_openvino) {
33703428
auto pos = path_bin.rfind('.');
33713429
if (pos != std::string::npos) {
33723430
path_bin = path_bin.substr(0, pos);
33733431
}
33743432

33753433
path_bin += "-encoder-openvino.xml";
33763434

3435+
if(!path_openvino.empty()) {
3436+
path_bin = replace_extra_data_directory(path_bin, path_openvino);
3437+
fprintf(stderr, "Replacement OpenVINO path %s\n", path_bin.c_str());
3438+
}
3439+
33773440
return path_bin;
33783441
}
33793442

3380-
static std::string whisper_openvino_get_path_cache(std::string path_bin) {
3443+
static std::string whisper_openvino_get_path_cache(std::string path_bin, std::string path_openvino) {
33813444
auto pos = path_bin.rfind('.');
33823445
if (pos != std::string::npos) {
33833446
path_bin = path_bin.substr(0, pos);
33843447
}
33853448

33863449
path_bin += "-encoder-openvino-cache";
33873450

3451+
if(!path_openvino.empty()) {
3452+
// This path doesn't have to exist as it may be created
3453+
path_bin = replace_extra_data_directory(path_bin, path_openvino, false);
3454+
fprintf(stderr, "Replacement OpenVINO cache path %s\n", path_bin.c_str());
3455+
}
3456+
33883457
return path_bin;
33893458
}
33903459
#endif
@@ -3456,20 +3525,22 @@ struct whisper_state * whisper_init_state(whisper_context * ctx) {
34563525
}
34573526

34583527
#ifdef WHISPER_USE_COREML
3459-
const auto path_coreml = whisper_get_coreml_path_encoder(ctx->path_model);
3528+
if(!ctx->params.disable_coreml) {
3529+
const auto path_coreml = whisper_get_coreml_path_encoder(ctx->path_model, ctx->params.path_coreml);
34603530

3461-
WHISPER_LOG_INFO("%s: loading Core ML model from '%s'\n", __func__, path_coreml.c_str());
3462-
WHISPER_LOG_INFO("%s: first run on a device may take a while ...\n", __func__);
3531+
WHISPER_LOG_INFO("%s: loading Core ML model from '%s'\n", __func__, path_coreml.c_str());
3532+
WHISPER_LOG_INFO("%s: first run on a device may take a while ...\n", __func__);
34633533

3464-
state->ctx_coreml = whisper_coreml_init(path_coreml.c_str());
3465-
if (!state->ctx_coreml) {
3466-
WHISPER_LOG_ERROR("%s: failed to load Core ML model from '%s'\n", __func__, path_coreml.c_str());
3534+
state->ctx_coreml = whisper_coreml_init(path_coreml.c_str());
3535+
if (!state->ctx_coreml) {
3536+
WHISPER_LOG_ERROR("%s: failed to load Core ML model from '%s'\n", __func__, path_coreml.c_str());
34673537
#ifndef WHISPER_COREML_ALLOW_FALLBACK
3468-
whisper_free_state(state);
3469-
return nullptr;
3538+
whisper_free_state(state);
3539+
return nullptr;
34703540
#endif
3471-
} else {
3472-
WHISPER_LOG_INFO("%s: Core ML model loaded\n", __func__);
3541+
} else {
3542+
WHISPER_LOG_INFO("%s: Core ML model loaded\n", __func__);
3543+
}
34733544
}
34743545
#endif
34753546

@@ -3585,17 +3656,17 @@ int whisper_ctx_init_openvino_encoder_with_state(
35853656
std::string path_encoder;
35863657
if (!model_path) {
35873658
//if model_path is not set, attempt to find it in the same directory as ggml-<model>.bin model
3588-
path_encoder = whisper_openvino_get_path_encoder(ctx->path_model);
3659+
path_encoder = whisper_openvino_get_path_encoder(ctx->path_model, ctx->params.path_openvino);
35893660
} else {
3590-
path_encoder = model_path;
3661+
path_encoder = replace_extra_data_directory(model_path, ctx->params.path_openvino);
35913662
}
35923663

35933664
std::string path_cache;
35943665
if (!cache_dir) {
35953666
//if cache_dir is not set, set it as a dir residing next to ggml-<model>.bin
3596-
path_cache = whisper_openvino_get_path_cache(ctx->path_model);
3667+
path_cache = whisper_openvino_get_path_cache(ctx->path_model, ctx->params.path_openvino);
35973668
} else {
3598-
path_cache = cache_dir;
3669+
path_cache = replace_extra_data_directory(cache_dir, ctx->params.path_openvino);
35993670
}
36003671

36013672
WHISPER_LOG_INFO("%s: loading OpenVINO model from '%s'\n", __func__, path_encoder.c_str());
@@ -3634,11 +3705,17 @@ struct whisper_context_params whisper_context_default_params() {
36343705
/*.n_heads =*/ 0,
36353706
/*.heads =*/ NULL,
36363707
},
3708+
/*.path_coreml =*/ nullptr,
3709+
/*.path_openvino =*/ nullptr,
3710+
/*.disable_coreml =*/ false,
36373711
/*.dtw_mem_size =*/ 1024*1024*128,
36383712
};
36393713
return result;
36403714
}
36413715

3716+
// std::string path_coreml; // populated by whisper_init_from_file_with_params()
3717+
// std::string path_openvino; // populated by whisper_init_from_file_with_params()
3718+
36423719
struct whisper_context * whisper_init_from_file_with_params_no_state(const char * path_model, struct whisper_context_params params) {
36433720
WHISPER_LOG_INFO("%s: loading model from '%s'\n", __func__, path_model);
36443721
#ifdef _MSC_VER

0 commit comments

Comments
 (0)