Skip to content

Commit 54258e9

Browse files
committed
optimized interface
1 parent 864d013 commit 54258e9

File tree

12 files changed

+89
-25
lines changed

12 files changed

+89
-25
lines changed

common/arg.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -960,6 +960,7 @@ static bool common_params_parse_ex(int argc, char ** argv, common_params_context
960960
for (auto & ex : mmproj_examples) {
961961
if (ctx_arg.ex == ex) {
962962
common_params_handle_model(params.mmproj, params.hf_token, "", params.offline);
963+
common_params_handle_model(params.ane, params.hf_token, "", params.offline);
963964
break;
964965
}
965966
}
@@ -2243,6 +2244,13 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
22432244
params.mmproj_use_gpu = false;
22442245
}
22452246
).set_examples(mmproj_examples).set_env("LLAMA_ARG_NO_MMPROJ_OFFLOAD"));
2247+
add_opt(common_arg(
2248+
{"--ane"}, "FILE",
2249+
"path to Apple Neural Engine model file for iOS",
2250+
[](common_params & params, const std::string & value) {
2251+
params.ane.path = value;
2252+
}
2253+
).set_examples(mmproj_examples).set_env("LLAMA_ARG_ANE"));
22462254
add_opt(common_arg(
22472255
{"--image", "--audio"}, "FILE",
22482256
"path to an image or audio file. use with multimodal models, can be repeated if you have multiple files\n",

common/common.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -353,6 +353,9 @@ struct common_params {
353353
bool mmproj_use_gpu = true; // use GPU for multimodal model
354354
bool no_mmproj = false; // explicitly disable multimodal model
355355
std::vector<std::string> image; // path to image file(s)
356+
357+
// Apple Neural Engine support
358+
struct common_params_model ane;
356359

357360
// embedding
358361
bool embedding = false; // get only sentence embedding

tools/mtmd/CMakeLists.txt

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,9 @@
22

33
find_package(Threads REQUIRED)
44

5+
# ANE support option
6+
option(ENABLE_ANE "Enable Apple Neural Engine support" OFF)
7+
58
add_library(mtmd
69
mtmd.cpp
710
mtmd-audio.cpp
@@ -15,8 +18,8 @@ add_library(mtmd
1518
mtmd-ios.h
1619
)
1720

18-
# Add ANE related files on Apple platforms
19-
if(APPLE)
21+
# Add ANE related files when enabled
22+
if(ENABLE_ANE)
2023
target_sources(mtmd PRIVATE
2124
ane/ane.h
2225
ane/ane.mm
@@ -40,8 +43,8 @@ target_include_directories(mtmd PRIVATE ../../src)
4043
target_include_directories(mtmd PRIVATE ../../vendor)
4144
target_compile_features (mtmd PRIVATE cxx_std_17)
4245

43-
# Link CoreML and Accelerate frameworks on Apple platforms
44-
if(APPLE)
46+
# Link CoreML and Accelerate frameworks when ANE is enabled
47+
if(ENABLE_ANE)
4548
target_link_libraries(mtmd PRIVATE
4649
"-framework Foundation"
4750
"-framework CoreML"
@@ -62,8 +65,8 @@ set(MTMD_PUBLIC_HEADERS
6265
${CMAKE_CURRENT_SOURCE_DIR}/mtmd-ios.h
6366
)
6467

65-
# Add ANE public headers on Apple platforms
66-
if(APPLE)
68+
# Add ANE public headers when enabled
69+
if(ENABLE_ANE)
6770
list(APPEND MTMD_PUBLIC_HEADERS
6871
${CMAKE_CURRENT_SOURCE_DIR}/ane/ane.h
6972
)
@@ -117,8 +120,8 @@ if (TARGET common)
117120
add_dependencies(${TARGET_IOS} common)
118121
endif()
119122

120-
# Add additional linking settings for iOS testing on Apple platforms
121-
if(APPLE)
123+
# Add additional linking settings for iOS testing when ANE is enabled
124+
if(ENABLE_ANE)
122125
target_link_libraries(${TARGET_IOS} PRIVATE
123126
"-framework Foundation"
124127
"-framework CoreML"

tools/mtmd/ane/ane.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
extern "C" {
33
#endif
44

5-
const void* loadModel();
5+
const void* loadModel(const char* model_path);
66
void closeModel(const void* model);
77
void predictWith(const void* model, float* embed, float* encoderOutput);
88

tools/mtmd/ane/ane.mm

Lines changed: 24 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -8,18 +8,30 @@
88
extern "C" {
99
#endif
1010

11-
const void* loadModel() {
12-
// 新的,从 documents directionary 中加载 begin
13-
// 获取文件管理器实例
14-
NSFileManager *fileManager = [NSFileManager defaultManager];
15-
// 获取应用的 Documents 目录的 URL
16-
NSURL *documentsURL = [[fileManager URLsForDirectory:NSDocumentDirectory inDomains:NSUserDomainMask] firstObject];
17-
NSString *pathString = [documentsURL.absoluteString stringByAppendingString:@"ane_minicpmv4_vit_f16.mlmodelc"];
18-
NSURL *modelURL = [NSURL URLWithString:pathString];
19-
20-
NSLog(modelURL.absoluteString);
21-
22-
const void* model = CFBridgingRetain([[ane_minicpmv4_vit_f16 alloc] initWithContentsOfURL:modelURL error:nil]);
11+
const void* loadModel(const char* model_path) {
12+
if (!model_path) {
13+
NSLog(@"Error: model_path is null");
14+
return nullptr;
15+
}
16+
17+
NSString *pathString = [NSString stringWithUTF8String:model_path];
18+
NSURL *modelURL = [NSURL fileURLWithPath:pathString];
19+
20+
NSLog(@"Loading ANE model from: %@", modelURL.absoluteString);
21+
22+
NSError *error = nil;
23+
const void* model = CFBridgingRetain([[ane_minicpmv4_vit_f16 alloc] initWithContentsOfURL:modelURL error:&error]);
24+
25+
if (error) {
26+
NSLog(@"Error loading ANE model: %@", error.localizedDescription);
27+
return nullptr;
28+
}
29+
30+
if (!model) {
31+
NSLog(@"Error: Failed to create ANE model instance");
32+
return nullptr;
33+
}
34+
2335
return model;
2436
}
2537

tools/mtmd/clip.cpp

Lines changed: 25 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -380,6 +380,9 @@ struct clip_ctx {
380380
// for debugging
381381
bool debug_graph = false;
382382
std::vector<ggml_tensor *> debug_print_tensors;
383+
384+
// ANE model path for iOS
385+
std::string ane_model_path;
383386

384387
clip_ctx(clip_context_params & ctx_params) {
385388
debug_graph = std::getenv("MTMD_DEBUG_GRAPH") != nullptr;
@@ -3803,15 +3806,27 @@ static std::vector<std::vector<float>> get_2d_sincos_pos_embed(int embed_dim, co
38033806
}
38043807

38053808
#ifdef __APPLE__
3806-
static bool clip_image_encode_ane(float * data, float * vec) {
3809+
static bool clip_image_encode_ane(float * data, float * vec, const char* ane_model_path) {
38073810

38083811
static int flag = 0;
38093812
static const void* coremlEncoder = NULL;
3810-
if (flag == 0) {
3811-
coremlEncoder = loadModel();
3813+
static std::string cached_model_path = "";
3814+
3815+
// Check if we need to load a new model
3816+
if (flag == 0 || (ane_model_path && cached_model_path != ane_model_path)) {
3817+
if (coremlEncoder) {
3818+
closeModel(coremlEncoder);
3819+
}
3820+
coremlEncoder = loadModel(ane_model_path);
3821+
if (!coremlEncoder) {
3822+
printf("Failed to load ANE model from: %s\n", ane_model_path ? ane_model_path : "null");
3823+
return false;
3824+
}
3825+
cached_model_path = ane_model_path ? ane_model_path : "";
38123826
flag = 1;
38133827
}
38143828
predictWith(coremlEncoder, data, vec);
3829+
return true;
38153830
}
38163831
#endif
38173832

@@ -3829,7 +3844,7 @@ bool clip_image_encode(struct clip_ctx * ctx, const int n_threads, clip_image_f3
38293844
float * vit_embedding2 = (float *)malloc(1100*1152*sizeof(float));
38303845

38313846
ane_embedding(ctx, n_threads, &imgs, vit_embedding1);
3832-
clip_image_encode_ane(vit_embedding1, vit_embedding2);
3847+
clip_image_encode_ane(vit_embedding1, vit_embedding2, ctx->ane_model_path.c_str());
38333848
ane_resampler(ctx, n_threads, &imgs, vit_embedding2, vec);
38343849
free(vit_embedding1);
38353850
free(vit_embedding2);
@@ -4634,3 +4649,9 @@ void clip_image_f32_batch_add_mel(struct clip_image_f32_batch * batch, int n_mel
46344649
batch->entries.push_back(clip_image_f32_ptr(audio));
46354650
batch->is_audio = true;
46364651
}
4652+
4653+
void clip_set_ane_model_path(struct clip_ctx * ctx, const char * ane_model_path) {
4654+
if (ctx && ane_model_path) {
4655+
ctx->ane_model_path = ane_model_path;
4656+
}
4657+
}

tools/mtmd/clip.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,3 +112,6 @@ void clip_image_f32_batch_add_mel(struct clip_image_f32_batch * batch, int n_mel
112112
bool clip_has_vision_encoder(const struct clip_ctx * ctx);
113113
bool clip_has_audio_encoder(const struct clip_ctx * ctx);
114114
bool clip_has_whisper_encoder(const struct clip_ctx * ctx);
115+
116+
// ANE support functions
117+
void clip_set_ane_model_path(struct clip_ctx * ctx, const char * ane_model_path);

tools/mtmd/mtmd-cli.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,7 @@ struct mtmd_cli_context {
132132
mparams.print_timings = true;
133133
mparams.n_threads = params.cpuparams.n_threads;
134134
mparams.verbosity = params.verbosity > 0 ? GGML_LOG_LEVEL_DEBUG : GGML_LOG_LEVEL_INFO;
135+
mparams.ane_model_path = params.ane.path.empty() ? nullptr : params.ane.path.c_str();
135136
ctx_vision.reset(mtmd_init_from_file(clip_path, model, mparams));
136137
if (!ctx_vision.get()) {
137138
LOG_ERR("Failed to load vision model from %s\n", clip_path);

tools/mtmd/mtmd-ios.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,11 +61,14 @@ mtmd_ios_params mtmd_ios_params_default(void) {
6161
mtmd_ios_params params = {};
6262
params.model_path = "";
6363
params.mmproj_path = "";
64+
params.ane_path = "";
6465
params.n_predict = -1;
6566
params.n_ctx = 4096;
6667
params.n_threads = 4;
6768
params.temperature = 0.2f;
6869
params.use_gpu = true;
70+
params.mmproj_use_gpu = true;
71+
params.warmup = true;
6972
return params;
7073
}
7174

@@ -86,6 +89,7 @@ mtmd_ios_context* mtmd_ios_init(const mtmd_ios_params* params) {
8689
common_params common_params;
8790
common_params.model.path = params->model_path;
8891
common_params.mmproj.path = params->mmproj_path;
92+
common_params.ane.path = params->ane_path;
8993
common_params.n_ctx = params->n_ctx;
9094
common_params.n_batch = 2048; // 增加batch大小,与标准mtmd保持一致
9195
common_params.cpuparams.n_threads = params->n_threads;
@@ -132,6 +136,7 @@ mtmd_ios_context* mtmd_ios_init(const mtmd_ios_params* params) {
132136
mparams.print_timings = false;
133137
mparams.n_threads = params->n_threads;
134138
mparams.verbosity = GGML_LOG_LEVEL_INFO;
139+
mparams.ane_model_path = params->ane_path.empty() ? nullptr : params->ane_path.c_str();
135140

136141
ctx->ctx_vision.reset(mtmd_init_from_file(params->mmproj_path.c_str(), ctx->model, mparams));
137142
if (!ctx->ctx_vision.get()) {

tools/mtmd/mtmd-ios.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ typedef struct mtmd_ios_context mtmd_ios_context;
1515
typedef struct mtmd_ios_params {
1616
std::string model_path;
1717
std::string mmproj_path;
18+
std::string ane_path;
1819
int n_predict;
1920
int n_ctx;
2021
int n_threads;

0 commit comments

Comments
 (0)