Skip to content

Commit ad74b66

Browse files
authored
Merge pull request #8 from tc-mb/support-coreml-in-server
Support coreml in server
2 parents 5161a16 + 794c322 commit ad74b66

File tree

3 files changed

+61
-13
lines changed

3 files changed

+61
-13
lines changed

tools/omni/omni-cli.cpp

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -103,12 +103,13 @@ static void sigint_handler(int signo) {
103103
// └── vision/
104104
// └── MiniCPM-o-4_5-vision-F16.gguf
105105
struct OmniModelPaths {
106-
std::string llm; // LLM 模型路径
107-
std::string vision; // 视觉模型路径
108-
std::string audio; // 音频模型路径
109-
std::string tts; // TTS 模型路径
110-
std::string projector; // Projector 模型路径
111-
std::string base_dir; // 模型根目录
106+
std::string llm; // LLM 模型路径
107+
std::string vision; // 视觉模型路径
108+
std::string audio; // 音频模型路径
109+
std::string tts; // TTS 模型路径
110+
std::string projector; // Projector 模型路径
111+
std::string vision_coreml; // CoreML 视觉模型路径 (.mlmodelc)
112+
std::string base_dir; // 模型根目录
112113
};
113114

114115
static std::string get_parent_dir(const std::string & path) {
@@ -138,6 +139,7 @@ static OmniModelPaths resolve_model_paths(const std::string & llm_path) {
138139
paths.audio = paths.base_dir + "/audio/MiniCPM-o-4_5-audio-F16.gguf";
139140
paths.tts = paths.base_dir + "/tts/MiniCPM-o-4_5-tts-F16.gguf";
140141
paths.projector = paths.base_dir + "/tts/MiniCPM-o-4_5-projector-F16.gguf";
142+
paths.vision_coreml = paths.base_dir + "/vision/coreml_minicpmo45_vit_all_f16.mlmodelc";
141143

142144
return paths;
143145
}
@@ -315,8 +317,7 @@ int main(int argc, char ** argv) {
315317
// 只有显式选择 coreml 后端时才设置 CoreML 模型路径
316318
if (vision_backend == "coreml") {
317319
if (vision_coreml_model_path.empty()) {
318-
fprintf(stderr, "Error: --vision-backend coreml requires --vision-coreml <path>\n");
319-
return 1;
320+
vision_coreml_model_path = paths.vision_coreml;
320321
}
321322
params.vision_coreml_model_path = vision_coreml_model_path;
322323
}

tools/omni/omni.cpp

Lines changed: 40 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3421,7 +3421,11 @@ void print_with_timestamp(const char* format, ...)
34213421

34223422
// 格式化时间戳
34233423
std::tm buf;
3424+
#ifdef _WIN32
3425+
localtime_s(&buf, &in_time_t);
3426+
#else
34243427
localtime_r(&in_time_t, &buf);
3428+
#endif
34253429
std::cout << std::put_time(&buf, "%H:%M:%S") << '.' << std::setfill('0') << std::setw(3) << ms.count() << " ";
34263430

34273431
// 打印格式化字符串
@@ -3694,14 +3698,14 @@ struct omni_context * omni_init(struct common_params * params, int media_type, b
36943698
ctx_omni->ctx_vision = ctx_vision;
36953699

36963700
// Set CoreML model path if available (for vision ANE acceleration)
3701+
// Note: .mlmodelc is a directory, not a file, so use stat instead of ifstream
36973702
if (ctx_vision && !ctx_omni->params->vision_coreml_model_path.empty()) {
3698-
std::ifstream coreml_file(ctx_omni->params->vision_coreml_model_path);
3699-
if (coreml_file.good()) {
3700-
coreml_file.close();
3703+
struct stat coreml_stat;
3704+
if (stat(ctx_omni->params->vision_coreml_model_path.c_str(), &coreml_stat) == 0) {
37013705
vision_set_coreml_model_path(ctx_vision, ctx_omni->params->vision_coreml_model_path.c_str());
37023706
LOG_INF("Vision CoreML model path set to: %s\n", ctx_omni->params->vision_coreml_model_path.c_str());
37033707
} else {
3704-
LOG_WRN("Vision CoreML model file does not exist: %s, skipping ANE\n", ctx_omni->params->vision_coreml_model_path.c_str());
3708+
LOG_WRN("Vision CoreML model path does not exist: %s, skipping ANE\n", ctx_omni->params->vision_coreml_model_path.c_str());
37053709
}
37063710
}
37073711
}
@@ -5286,7 +5290,11 @@ static void move_old_output_to_archive() {
52865290
}
52875291

52885292
// Check if directory has any files/subdirectories
5293+
#ifdef _WIN32
5294+
std::string cmd = "dir /b \"" + dir_path + "\" 2>NUL | findstr /r \".\" >NUL 2>&1";
5295+
#else
52895296
std::string cmd = "test -n \"$(ls -A " + dir_path + " 2>/dev/null)\"";
5297+
#endif
52905298
int ret = system(cmd.c_str());
52915299
return (ret == 0); // Returns 0 if directory has content
52925300
};
@@ -5315,10 +5323,29 @@ static void move_old_output_to_archive() {
53155323

53165324
// Find maximum ID in old_output directory
53175325
int max_id = -1;
5326+
#ifdef _WIN32
5327+
std::string find_cmd = "dir /b \"" + old_output_base + "\" 2>NUL";
5328+
#else
53185329
std::string find_cmd = "ls -1 " + old_output_base + " 2>/dev/null | grep -E '^[0-9]+$' | sort -n | tail -1";
5330+
#endif
53195331
FILE* pipe = popen(find_cmd.c_str(), "r");
53205332
if (pipe) {
53215333
char buffer[128];
5334+
#ifdef _WIN32
5335+
// On Windows, read all entries and find the max numeric ID
5336+
while (fgets(buffer, sizeof(buffer), pipe) != nullptr) {
5337+
std::string result(buffer);
5338+
while (!result.empty() && (result.back() == '\n' || result.back() == '\r')) {
5339+
result.pop_back();
5340+
}
5341+
if (!result.empty()) {
5342+
try {
5343+
int id = std::stoi(result);
5344+
if (id > max_id) max_id = id;
5345+
} catch (...) {}
5346+
}
5347+
}
5348+
#else
53225349
if (fgets(buffer, sizeof(buffer), pipe) != nullptr) {
53235350
std::string result(buffer);
53245351
// Remove trailing newline
@@ -5333,6 +5360,7 @@ static void move_old_output_to_archive() {
53335360
}
53345361
}
53355362
}
5363+
#endif
53365364
pclose(pipe);
53375365
}
53385366

@@ -8517,10 +8545,17 @@ bool stream_prefill(struct omni_context * ctx_omni, std::string aud_fname, std::
85178545
// 只有在新一轮开始时 (index == 0) 才需要等待上一轮 TTS 完成
85188546
// 同一轮内的后续 prefill (index >= 1) 不需要等待
85198547
if (ctx_omni->use_tts && index == 0 && ctx_omni->warmup_done.load() && !ctx_omni->duplex_mode) {
8548+
// 🔧 如果 break_event 已触发,跳过等待(上一轮已被打断)
8549+
if (ctx_omni->break_event.load()) {
8550+
print_with_timestamp("TTS: break_event active, skipping wait for previous round\n");
8551+
ctx_omni->speek_done = true;
8552+
ctx_omni->break_event.store(false);
8553+
speek_cv.notify_all();
8554+
}
85208555
print_with_timestamp("TTS: 等待上一轮语音生成完成\n");
85218556
std::unique_lock<std::mutex> lock(speek_mtx);
85228557
// 添加超时等待,避免永久卡住
8523-
auto wait_result = speek_cv.wait_for(lock, std::chrono::seconds(30), [&]{return ctx_omni->speek_done; });
8558+
auto wait_result = speek_cv.wait_for(lock, std::chrono::seconds(5), [&]{return ctx_omni->speek_done || ctx_omni->break_event.load(); });
85248559
if (!wait_result) {
85258560
// 强制设置为 true 以继续
85268561
ctx_omni->speek_done = true;

tools/server/server.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5786,6 +5786,18 @@ int main(int argc, char ** argv) {
57865786
params.tts_model = model_dir_normalized + "tts/MiniCPM-o-4_5-tts-F16.gguf";
57875787
// LLM 模型路径由 llama-server 启动时的 --model 参数指定,这里不需要设置
57885788
// params.model.path 已经由 ctx_server.model 提供
5789+
5790+
// 视觉编码器后端: "metal"(默认GPU) 或 "coreml"(ANE加速)
5791+
std::string vision_backend = json_value(data, "vision_backend", std::string("metal"));
5792+
if (vision_backend == "coreml") {
5793+
// CoreML 模式:自动从 model_dir/vision/ 下查找 .mlmodelc
5794+
std::string vision_coreml = json_value(data, "vision_coreml_model_path",
5795+
std::string(model_dir_normalized + "vision/coreml_minicpmo45_vit_all_f16.mlmodelc"));
5796+
params.vision_coreml_model_path = vision_coreml;
5797+
} else {
5798+
// Metal (GPU) 模式:不设置 CoreML 路径
5799+
params.vision_coreml_model_path = "";
5800+
}
57895801

57905802
{
57915803
std::lock_guard<std::mutex> lock(ctx_server.octx_mutex);

0 commit comments

Comments
 (0)