Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 23 additions & 1 deletion examples/bench/bench.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@ struct whisper_params {
int32_t what = 0; // what to benchmark: 0 - whisper encoder, 1 - memcpy, 2 - ggml_mul_mat

std::string model = "models/ggml-base.en.bin";
std::string coreml_dir = "";
std::string openvino_dir = "";
bool disable_coreml = false;
Comment on lines +14 to +16
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is not a good change - we should not pass backend-specific info through the libwhisper API.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

--ov-e-device in Cli appears rather backend specific?


bool use_gpu = true;
bool flash_attn = false;
Expand All @@ -28,7 +31,10 @@ static bool whisper_params_parse(int argc, char ** argv, whisper_params & params
}
else if (arg == "-t" || arg == "--threads") { params.n_threads = std::stoi(argv[++i]); }
else if (arg == "-m" || arg == "--model") { params.model = argv[++i]; }
else if (arg == "-l" || arg == "--coreml") { params.coreml_dir = argv[++i]; }
else if (arg == "-v" || arg == "--openvino") { params.openvino_dir = argv[++i]; }
else if (arg == "-w" || arg == "--what") { params.what = atoi(argv[++i]); }
else if (arg == "-ml" || arg == "--disable-ml") { params.disable_coreml = true; }
else if (arg == "-ng" || arg == "--no-gpu") { params.use_gpu = false; }
else if (arg == "-fa" || arg == "--flash-attn") { params.flash_attn = true; }
else {
Expand All @@ -55,17 +61,31 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para
fprintf(stderr, " %-7s 2 - ggml_mul_mat\n", "");
fprintf(stderr, " -ng, --no-gpu [%-7s] disable GPU\n", params.use_gpu ? "false" : "true");
fprintf(stderr, " -fa, --flash-attn [%-7s] enable flash attention\n", params.flash_attn ? "true" : "false");
fprintf(stderr, " -ml, --disable-ml [%-7s] disable CoreML\n", params.disable_coreml ? "true" : "false");
fprintf(stderr, " -l --coreml [%-7s] Set CoreML Directory\n", params.coreml_dir.c_str());
fprintf(stderr, " -v --openvino [%-7s] Set OpenVINO Directory\n", params.openvino_dir.c_str());
fprintf(stderr, "\n");
}

static char * string_to_ptr(const std::string s) {
return strdup(s.c_str());
}

static int whisper_bench_full(const whisper_params & params) {
// whisper init

struct whisper_context_params cparams = whisper_context_default_params();
char * sptr1 = nullptr;
char * sptr2 = nullptr;

cparams.use_gpu = params.use_gpu;
cparams.flash_attn = params.flash_attn;

sptr1 = string_to_ptr(params.coreml_dir);
cparams.path_openvino = sptr1;
sptr2 = string_to_ptr(params.openvino_dir);
cparams.path_coreml = sptr2;
cparams.disable_coreml = params.disable_coreml;

struct whisper_context * ctx = whisper_init_from_file_with_params(params.model.c_str(), cparams);

{
Expand Down Expand Up @@ -138,6 +158,8 @@ static int whisper_bench_full(const whisper_params & params) {
}

whisper_print_timings(ctx);
free(sptr2);
free(sptr1);
whisper_free(ctx);

fprintf(stderr, "\n");
Expand Down
25 changes: 24 additions & 1 deletion examples/cli/cli.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,10 @@ struct whisper_params {
// A regular expression that matches tokens to suppress
std::string suppress_regex;

std::string openvino_directory = "";
std::string coreml_directory = "";
bool disable_coreml = false;

std::string openvino_encode_device = "CPU";

std::string dtw = "";
Expand Down Expand Up @@ -186,6 +190,9 @@ static bool whisper_params_parse(int argc, char ** argv, whisper_params & params
else if ( arg == "--prompt") { params.prompt = ARGV_NEXT; }
else if (arg == "-m" || arg == "--model") { params.model = ARGV_NEXT; }
else if (arg == "-f" || arg == "--file") { params.fname_inp.emplace_back(ARGV_NEXT); }
else if (arg == "-dov" || arg == "--ov-directory") { params.openvino_directory = ARGV_NEXT; }
else if (arg == "-docml"|| arg == "--ocml-directory") { params.coreml_directory = ARGV_NEXT; }
else if (arg == "-noml" || arg == "--disable-coreml") { params.disable_coreml = true; }
else if (arg == "-oved" || arg == "--ov-e-device") { params.openvino_encode_device = ARGV_NEXT; }
else if (arg == "-dtw" || arg == "--dtw") { params.dtw = ARGV_NEXT; }
else if (arg == "-ls" || arg == "--log-score") { params.log_score = true; }
Expand Down Expand Up @@ -265,6 +272,8 @@ static void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params
fprintf(stderr, " -m FNAME, --model FNAME [%-7s] model path\n", params.model.c_str());
fprintf(stderr, " -f FNAME, --file FNAME [%-7s] input audio file path\n", "");
fprintf(stderr, " -oved D, --ov-e-device DNAME [%-7s] the OpenVINO device used for encode inference\n", params.openvino_encode_device.c_str());
fprintf(stderr, " -dov DN, --ov-directory DN [%-7s] the OpenVINO directory path\n", params.openvino_directory.c_str());
fprintf(stderr, " -noml, --disable-coreml [%-7s] Disable CoreML\n", params.disable_coreml ? "true" : "false");
fprintf(stderr, " -dtw MODEL --dtw MODEL [%-7s] compute token-level timestamps\n", params.dtw.c_str());
fprintf(stderr, " -ls, --log-score [%-7s] log best decoder scores of tokens\n", params.log_score?"true":"false");
fprintf(stderr, " -ng, --no-gpu [%-7s] disable GPU\n", params.use_gpu ? "false" : "true");
Expand Down Expand Up @@ -883,6 +892,10 @@ static void output_lrc(struct whisper_context * ctx, std::ofstream & fout, const
}


static char * string_to_ptr(const std::string s) {
return strdup(s.c_str());
}

static void cb_log_disable(enum ggml_log_level , const char * , void * ) { }

int main(int argc, char ** argv) {
Expand All @@ -894,6 +907,8 @@ int main(int argc, char ** argv) {
#endif

whisper_params params;
char * sptr1 = nullptr;
char * sptr2 = nullptr;

// If the only argument starts with "@", read arguments line-by-line
// from the given file.
Expand Down Expand Up @@ -970,7 +985,12 @@ int main(int argc, char ** argv) {

cparams.use_gpu = params.use_gpu;
cparams.flash_attn = params.flash_attn;

sptr1 = string_to_ptr(params.coreml_directory);
cparams.path_coreml = sptr1;
sptr2 = string_to_ptr(params.openvino_directory);
cparams.path_openvino = sptr2;
cparams.disable_coreml = params.disable_coreml;

if (!params.dtw.empty()) {
cparams.dtw_token_timestamps = true;
cparams.dtw_aheads_preset = WHISPER_AHEADS_NONE;
Expand Down Expand Up @@ -1263,6 +1283,9 @@ int main(int argc, char ** argv) {
if (!params.no_prints) {
whisper_print_timings(ctx);
}
free(sptr2);
free(sptr1);

whisper_free(ctx);

return 0;
Expand Down
6 changes: 6 additions & 0 deletions include/whisper.h
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,12 @@ extern "C" {
int dtw_n_top;
struct whisper_aheads dtw_aheads;

// Allow coreml + openvino files to have their own directories
// Purposely not ifdef'd
char * path_coreml;
char * path_openvino;
bool disable_coreml;

size_t dtw_mem_size; // TODO: remove
};

Expand Down
102 changes: 85 additions & 17 deletions src/whisper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
#include <string>
#include <thread>
#include <vector>
#include <sys/stat.h> // For data_file_exists

#if defined(WHISPER_BIG_ENDIAN)
template<typename T>
Expand Down Expand Up @@ -979,6 +980,9 @@ struct whisper_context {
whisper_state * state = nullptr;

std::string path_model; // populated by whisper_init_from_file_with_params()

std::string path_coreml; // populated by whisper_init_from_file_with_params()
std::string path_openvino; // populated by whisper_init_from_file_with_params()
};

struct whisper_global {
Expand Down Expand Up @@ -3341,9 +3345,52 @@ static std::vector<whisper_vocab::id> tokenize(const whisper_vocab & vocab, cons
// interface implementation
//

static bool data_file_exists (const char * filename) {
struct stat buffer;
return (stat (filename, &buffer) == 0);
}

static std::string replace_extra_data_directory(std::string path_bin, std::string replacement_path, bool must_exist = true) {
std::string new_path = replacement_path;
std::string file_part = path_bin;

// Check replacement_path actually exists
if(must_exist && !data_file_exists(new_path.c_str())) {
fprintf(stderr, "Trying to replace with non-existant path %s returning passed path %s\n", replacement_path.c_str(), path_bin.c_str());
return path_bin;
}

// Win 10/11 accepts both slashes while Linux/Mac only uses /
auto pos = file_part.find_last_of("/\\");
if (pos != std::string::npos) {
file_part = file_part.substr(pos + 1, std::string::npos);
}


pos = new_path.find_last_of("/\\");

// Append trailing slash if required
if(pos < new_path.length() - 1) {
#ifdef _WIN32
new_path = new_path + "\\";
#else
new_path = new_path + "/";
#endif
}

new_path = new_path + file_part;

if(must_exist && !data_file_exists(new_path.c_str())) {
fprintf(stderr, "Error replacing path %s returning passed path %s\n", replacement_path.c_str(), path_bin.c_str());
return path_bin;
}

return new_path;
}

#ifdef WHISPER_USE_COREML
// replace .bin with -encoder.mlmodelc
static std::string whisper_get_coreml_path_encoder(std::string path_bin) {
static std::string whisper_get_coreml_path_encoder(std::string path_bin, std::string path_coreml) {
auto pos = path_bin.rfind('.');
if (pos != std::string::npos) {
path_bin = path_bin.substr(0, pos);
Expand All @@ -3360,31 +3407,47 @@ static std::string whisper_get_coreml_path_encoder(std::string path_bin) {

path_bin += "-encoder.mlmodelc";

if(!path_coreml.empty()) {
path_bin = replace_extra_data_directory(path_bin, path_coreml);
fprintf(stderr, "Replacement CoreML path %s\n", path_bin.c_str());
}

return path_bin;
}
#endif

#ifdef WHISPER_USE_OPENVINO
// replace .bin with-encoder-openvino.xml
static std::string whisper_openvino_get_path_encoder(std::string path_bin) {
static std::string whisper_openvino_get_path_encoder(std::string path_bin, std::string path_openvino) {
auto pos = path_bin.rfind('.');
if (pos != std::string::npos) {
path_bin = path_bin.substr(0, pos);
}

path_bin += "-encoder-openvino.xml";

if(!path_openvino.empty()) {
path_bin = replace_extra_data_directory(path_bin, path_openvino);
fprintf(stderr, "Replacement OpenVINO path %s\n", path_bin.c_str());
}

return path_bin;
}

static std::string whisper_openvino_get_path_cache(std::string path_bin) {
static std::string whisper_openvino_get_path_cache(std::string path_bin, std::string path_openvino) {
auto pos = path_bin.rfind('.');
if (pos != std::string::npos) {
path_bin = path_bin.substr(0, pos);
}

path_bin += "-encoder-openvino-cache";

if(!path_openvino.empty()) {
// This path doesn't have to exist as it may be created
path_bin = replace_extra_data_directory(path_bin, path_openvino, false);
fprintf(stderr, "Replacement OpenVINO cache path %s\n", path_bin.c_str());
}

return path_bin;
}
#endif
Expand Down Expand Up @@ -3456,20 +3519,22 @@ struct whisper_state * whisper_init_state(whisper_context * ctx) {
}

#ifdef WHISPER_USE_COREML
const auto path_coreml = whisper_get_coreml_path_encoder(ctx->path_model);
if(!ctx->params.disable_coreml) {
const auto path_coreml = whisper_get_coreml_path_encoder(ctx->path_model, ctx->params.path_coreml);

WHISPER_LOG_INFO("%s: loading Core ML model from '%s'\n", __func__, path_coreml.c_str());
WHISPER_LOG_INFO("%s: first run on a device may take a while ...\n", __func__);
WHISPER_LOG_INFO("%s: loading Core ML model from '%s'\n", __func__, path_coreml.c_str());
WHISPER_LOG_INFO("%s: first run on a device may take a while ...\n", __func__);

state->ctx_coreml = whisper_coreml_init(path_coreml.c_str());
if (!state->ctx_coreml) {
WHISPER_LOG_ERROR("%s: failed to load Core ML model from '%s'\n", __func__, path_coreml.c_str());
state->ctx_coreml = whisper_coreml_init(path_coreml.c_str());
if (!state->ctx_coreml) {
WHISPER_LOG_ERROR("%s: failed to load Core ML model from '%s'\n", __func__, path_coreml.c_str());
#ifndef WHISPER_COREML_ALLOW_FALLBACK
whisper_free_state(state);
return nullptr;
whisper_free_state(state);
return nullptr;
#endif
} else {
WHISPER_LOG_INFO("%s: Core ML model loaded\n", __func__);
} else {
WHISPER_LOG_INFO("%s: Core ML model loaded\n", __func__);
}
}
#endif

Expand Down Expand Up @@ -3585,17 +3650,17 @@ int whisper_ctx_init_openvino_encoder_with_state(
std::string path_encoder;
if (!model_path) {
//if model_path is not set, attempt to find it in the same directory as ggml-<model>.bin model
path_encoder = whisper_openvino_get_path_encoder(ctx->path_model);
path_encoder = whisper_openvino_get_path_encoder(ctx->path_model, ctx->params.path_openvino);
} else {
path_encoder = model_path;
path_encoder = replace_extra_data_directory(model_path, ctx->params.path_openvino);
}

std::string path_cache;
if (!cache_dir) {
//if cache_dir is not set, set it as a dir residing next to ggml-<model>.bin
path_cache = whisper_openvino_get_path_cache(ctx->path_model);
path_cache = whisper_openvino_get_path_cache(ctx->path_model, ctx->params.path_openvino);
} else {
path_cache = cache_dir;
path_cache = replace_extra_data_directory(cache_dir, ctx->params.path_openvino);
}

WHISPER_LOG_INFO("%s: loading OpenVINO model from '%s'\n", __func__, path_encoder.c_str());
Expand Down Expand Up @@ -3634,6 +3699,9 @@ struct whisper_context_params whisper_context_default_params() {
/*.n_heads =*/ 0,
/*.heads =*/ NULL,
},
/*.path_coreml =*/ nullptr,
/*.path_openvino =*/ nullptr,
/*.disable_coreml =*/ false,
/*.dtw_mem_size =*/ 1024*1024*128,
};
return result;
Expand Down
Loading