Skip to content

Commit fd64e45

Browse files
committed
Replace ane with coreml; replace malloc with std::vector<float>
1 parent 9eee52c commit fd64e45

File tree

12 files changed

+79
-86
lines changed

12 files changed

+79
-86
lines changed

common/arg.cpp

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -961,7 +961,7 @@ static bool common_params_parse_ex(int argc, char ** argv, common_params_context
961961
for (auto & ex : mmproj_examples) {
962962
if (ctx_arg.ex == ex) {
963963
common_params_handle_model(params.mmproj, params.hf_token, "", params.offline);
964-
common_params_handle_model(params.ane, params.hf_token, "", params.offline);
964+
common_params_handle_model(params.coreml, params.hf_token, "", params.offline);
965965
break;
966966
}
967967
}
@@ -2264,13 +2264,15 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
22642264
params.mmproj_use_gpu = false;
22652265
}
22662266
).set_examples(mmproj_examples).set_env("LLAMA_ARG_NO_MMPROJ_OFFLOAD"));
2267+
// CoreML model path (new)
22672268
add_opt(common_arg(
2268-
{"--ane"}, "FILE",
2269-
"path to Apple Neural Engine model file for iOS",
2269+
{"--coreml"}, "FILE",
2270+
"path to CoreML model file",
22702271
[](common_params & params, const std::string & value) {
2271-
params.ane.path = value;
2272+
params.coreml.path = value;
22722273
}
2273-
).set_examples(mmproj_examples).set_env("LLAMA_ARG_ANE"));
2274+
).set_examples(mmproj_examples).set_env("LLAMA_ARG_COREML"));
2275+
22742276
add_opt(common_arg(
22752277
{"--image", "--audio"}, "FILE",
22762278
"path to an image or audio file. use with multimodal models, can be repeated if you have multiple files\n",

common/common.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -377,7 +377,7 @@ struct common_params {
377377
std::vector<std::string> image; // path to image file(s)
378378

379379
// Apple Neural Engine support
380-
struct common_params_model ane;
380+
struct common_params_model coreml;
381381

382382
// embedding
383383
bool embedding = false; // get only sentence embedding

tools/mtmd/CMakeLists.txt

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@
22

33
find_package(Threads REQUIRED)
44

5-
# ANE support option
6-
option(ENABLE_ANE "Enable Apple Neural Engine support" OFF)
5+
# CoreML support option
6+
option(ENABLE_COREML "Enable CoreML support" OFF)
77

88
add_library(mtmd
99
mtmd.cpp
@@ -16,20 +16,20 @@ add_library(mtmd
1616
mtmd-helper.h
1717
)
1818

19-
# Add ANE related files when enabled
20-
if(ENABLE_ANE)
19+
# Add CoreML related files when enabled
20+
if(ENABLE_COREML)
2121
target_sources(mtmd PRIVATE
22-
ane/ane.h
23-
ane/ane.mm
24-
ane/ane_minicpmv4_vit_f16.h
25-
ane/ane_minicpmv4_vit_f16.m
22+
coreml/mtmd_coreml.h
23+
coreml/mtmd_coreml.mm
24+
coreml/ane_minicpmv4_vit_f16.h
25+
coreml/ane_minicpmv4_vit_f16.m
2626
)
2727
# Define compile-time macro for code guards
28-
target_compile_definitions(mtmd PRIVATE ENABLE_ANE)
28+
target_compile_definitions(mtmd PRIVATE ENABLE_COREML)
2929

3030
# Enable ARC for Objective-C files
31-
set_source_files_properties(ane/ane.mm PROPERTIES COMPILE_FLAGS "-fobjc-arc")
32-
set_source_files_properties(ane/ane_minicpmv4_vit_f16.m PROPERTIES COMPILE_FLAGS "-fobjc-arc")
31+
set_source_files_properties(coreml/mtmd_coreml.mm PROPERTIES COMPILE_FLAGS "-fobjc-arc")
32+
set_source_files_properties(coreml/ane_minicpmv4_vit_f16.m PROPERTIES COMPILE_FLAGS "-fobjc-arc")
3333
endif()
3434

3535
target_link_libraries (mtmd PUBLIC ggml llama common)
@@ -43,8 +43,8 @@ target_include_directories(mtmd PRIVATE ../../src)
4343
target_include_directories(mtmd PRIVATE ../../vendor)
4444
target_compile_features (mtmd PRIVATE cxx_std_17)
4545

46-
# Link CoreML and Accelerate frameworks when ANE is enabled
47-
if(ENABLE_ANE)
46+
# Link CoreML and Accelerate frameworks when CoreML is enabled
47+
if(ENABLE_COREML)
4848
target_link_libraries(mtmd PRIVATE
4949
"-framework Foundation"
5050
"-framework CoreML"
@@ -64,10 +64,10 @@ set(MTMD_PUBLIC_HEADERS
6464
${CMAKE_CURRENT_SOURCE_DIR}/mtmd-helper.h
6565
)
6666

67-
# Add ANE public headers when enabled
68-
if(ENABLE_ANE)
67+
# Add CoreML public headers when enabled
68+
if(ENABLE_COREML)
6969
list(APPEND MTMD_PUBLIC_HEADERS
70-
${CMAKE_CURRENT_SOURCE_DIR}/ane/ane.h
70+
${CMAKE_CURRENT_SOURCE_DIR}/coreml/coreml.h
7171
)
7272
endif()
7373

tools/mtmd/clip.cpp

Lines changed: 34 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,8 @@
1010
#include "ggml-alloc.h"
1111
#include "ggml-backend.h"
1212
#include "gguf.h"
13-
#if defined(ENABLE_ANE)
14-
#include "ane/ane.h"
13+
#if defined(ENABLE_COREML)
14+
#include "coreml/mtmd_coreml.h"
1515
#endif
1616

1717
#include <cassert>
@@ -392,8 +392,8 @@ struct clip_ctx {
392392
bool debug_graph = false;
393393
std::vector<ggml_tensor *> debug_print_tensors;
394394

395-
// ANE model path for iOS
396-
std::string ane_model_path;
395+
// CoreML model path for iOS
396+
std::string coreml_model_path;
397397

398398
clip_ctx(clip_context_params & ctx_params) {
399399
debug_graph = std::getenv("MTMD_DEBUG_GRAPH") != nullptr;
@@ -914,8 +914,6 @@ struct clip_graph {
914914
}
915915

916916
ggml_cgraph * build_minicpmv_embedding() {
917-
const int batch_size = 1;
918-
919917
GGML_ASSERT(model.class_embedding == nullptr);
920918
const int n_pos = n_patches;
921919

@@ -3840,24 +3838,28 @@ static std::vector<std::vector<float>> get_2d_sincos_pos_embed(int embed_dim, co
38403838
return pos_embed_2d;
38413839
}
38423840

3843-
#if defined(ENABLE_ANE)
3844-
static bool clip_image_encode_ane(float * data, float * vec, const char* ane_model_path) {
3841+
#if defined(ENABLE_COREML)
3842+
// forward declarations
3843+
static bool coreml_embedding(clip_ctx * ctx, const int n_threads, const clip_image_f32_batch * imgs_c_ptr, float * vec);
3844+
static bool coreml_resampler(clip_ctx * ctx, const int n_threads, const clip_image_f32_batch * imgs_c_ptr, const float * vit_embedding, float * vec);
3845+
3846+
static bool clip_image_encode_coreml(float * data, float * vec, const char* coreml_model_path) {
38453847

38463848
static int flag = 0;
38473849
static const void* coremlEncoder = NULL;
38483850
static std::string cached_model_path = "";
38493851

38503852
// Check if we need to load a new model
3851-
if (flag == 0 || (ane_model_path && cached_model_path != ane_model_path)) {
3853+
if (flag == 0 || (coreml_model_path && cached_model_path != coreml_model_path)) {
38523854
if (coremlEncoder) {
38533855
closeModel(coremlEncoder);
38543856
}
3855-
coremlEncoder = loadModel(ane_model_path);
3857+
coremlEncoder = loadModel(coreml_model_path);
38563858
if (!coremlEncoder) {
3857-
printf("Failed to load ANE model from: %s\n", ane_model_path ? ane_model_path : "null");
3859+
printf("Failed to load CoreML model from: %s\n", coreml_model_path ? coreml_model_path : "null");
38583860
return false;
38593861
}
3860-
cached_model_path = ane_model_path ? ane_model_path : "";
3862+
cached_model_path = coreml_model_path ? coreml_model_path : "";
38613863
flag = 1;
38623864
}
38633865
predictWith(coremlEncoder, data, vec);
@@ -3871,27 +3873,30 @@ bool clip_image_encode(struct clip_ctx * ctx, const int n_threads, clip_image_f3
38713873
*img_copy = *img;
38723874
imgs.entries.push_back(std::move(img_copy));
38733875

3874-
#if defined(ENABLE_ANE)
3876+
#if defined(ENABLE_COREML)
38753877
bool ios_ctx = true;
38763878
if (ios_ctx){
3877-
printf("clip use ane\n");
3878-
float * vit_embedding1 = (float *)malloc(1100*1152*sizeof(float));
3879-
float * vit_embedding2 = (float *)malloc(1100*1152*sizeof(float));
3880-
3881-
ane_embedding(ctx, n_threads, &imgs, vit_embedding1);
3882-
clip_image_encode_ane(vit_embedding1, vit_embedding2, ctx->ane_model_path.c_str());
3883-
ane_resampler(ctx, n_threads, &imgs, vit_embedding2, vec);
3884-
free(vit_embedding1);
3885-
free(vit_embedding2);
3879+
printf("clip use coreml\n");
3880+
std::vector<float> vit_embedding1(1100*1152);
3881+
std::vector<float> vit_embedding2(1100*1152);
3882+
3883+
// call CoreML pipeline: embedding -> encoder -> resampler
3884+
if (!coreml_embedding(ctx, n_threads, &imgs, vit_embedding1.data())) {
3885+
return false;
3886+
}
3887+
clip_image_encode_coreml(vit_embedding1.data(), vit_embedding2.data(), ctx->coreml_model_path.c_str());
3888+
if (!coreml_resampler(ctx, n_threads, &imgs, vit_embedding2.data(), vec)) {
3889+
return false;
3890+
}
38863891
return true;
38873892
}
38883893
#endif
38893894

38903895
return clip_image_batch_encode(ctx, n_threads, &imgs, vec);
38913896
}
38923897

3893-
#if defined(ENABLE_ANE)
3894-
static bool ane_embedding(clip_ctx * ctx, const int n_threads, const clip_image_f32_batch * imgs_c_ptr, float * vec) {
3898+
#if defined(ENABLE_COREML)
3899+
static bool coreml_embedding(clip_ctx * ctx, const int n_threads, const clip_image_f32_batch * imgs_c_ptr, float * vec) {
38953900
const clip_image_f32_batch & imgs = *imgs_c_ptr;
38963901
int batch_size = imgs.entries.size();
38973902

@@ -3908,7 +3913,7 @@ static bool ane_embedding(clip_ctx * ctx, const int n_threads, const clip_image_
39083913
clip_graph graph(ctx, *imgs.entries[0]);
39093914
ggml_cgraph * gf;
39103915
gf = graph.build_minicpmv_embedding();
3911-
ggml_backend_sched_alloc_graph(ctx->sched.get(), gf);
3916+
ggml_backend_sched_alloc_graph(ctx->sched.get(), gf);
39123917

39133918
// set inputs
39143919
const auto & model = ctx->model;
@@ -3918,8 +3923,6 @@ static bool ane_embedding(clip_ctx * ctx, const int n_threads, const clip_image_
39183923
const int image_size_height = imgs.entries[0]->ny;
39193924

39203925
const int patch_size = hparams.patch_size;
3921-
const int num_patches = ((image_size_width / patch_size) * (image_size_height / patch_size));
3922-
const int n_pos = num_patches + (model.class_embedding ? 1 : 0);
39233926
const int pos_w = image_size_width / patch_size;
39243927
const int pos_h = image_size_height / patch_size;
39253928

@@ -4054,16 +4057,13 @@ static bool ane_embedding(clip_ctx * ctx, const int n_threads, const clip_image_
40544057
// the last node is the embedding tensor
40554058
ggml_tensor * embeddings = ggml_graph_node(gf, -1);
40564059

4057-
// sanity check (only support batch size of 1 for now)
4058-
const int n_tokens_out = embeddings->ne[1];
4059-
40604060
// copy the embeddings to the location passed by the user
40614061
ggml_backend_tensor_get(embeddings, vec, 0, ggml_nbytes(embeddings));
40624062

40634063
return true;
40644064
}
40654065

4066-
static bool ane_resampler(clip_ctx * ctx, const int n_threads, const clip_image_f32_batch * imgs_c_ptr, const float * vit_embedding, float * vec) {
4066+
static bool coreml_resampler(clip_ctx * ctx, const int n_threads, const clip_image_f32_batch * imgs_c_ptr, const float * vit_embedding, float * vec) {
40674067
const clip_image_f32_batch & imgs = *imgs_c_ptr;
40684068
int batch_size = imgs.entries.size();
40694069

@@ -4090,8 +4090,6 @@ static bool ane_resampler(clip_ctx * ctx, const int n_threads, const clip_image_
40904090
const int image_size_height = imgs.entries[0]->ny;
40914091

40924092
const int patch_size = hparams.patch_size;
4093-
const int num_patches = ((image_size_width / patch_size) * (image_size_height / patch_size));
4094-
const int n_pos = num_patches + (model.class_embedding ? 1 : 0);
40954093
const int pos_w = image_size_width / patch_size;
40964094
const int pos_h = image_size_height / patch_size;
40974095

@@ -4113,13 +4111,6 @@ static bool ane_resampler(clip_ctx * ctx, const int n_threads, const clip_image_
41134111
ggml_backend_tensor_set(cur, values.data(), 0, ggml_nbytes(cur));
41144112
};
41154113

4116-
auto set_input_i32 = [&get_inp_tensor](const char * name, std::vector<int32_t> & values) {
4117-
ggml_tensor * cur = get_inp_tensor(name);
4118-
GGML_ASSERT(cur->type == GGML_TYPE_I32);
4119-
GGML_ASSERT(ggml_nelements(cur) == (int64_t)values.size());
4120-
ggml_backend_tensor_set(cur, values.data(), 0, ggml_nbytes(cur));
4121-
};
4122-
41234114
{
41244115
struct ggml_tensor * embeddings = ggml_graph_get_tensor(gf, "embeddings");
41254116
ggml_backend_tensor_set(embeddings, vit_embedding, 0, ggml_nbytes(embeddings));
@@ -4674,8 +4665,8 @@ void clip_image_f32_batch_add_mel(struct clip_image_f32_batch * batch, int n_mel
46744665
batch->is_audio = true;
46754666
}
46764667

4677-
void clip_set_ane_model_path(struct clip_ctx * ctx, const char * ane_model_path) {
4678-
if (ctx && ane_model_path) {
4679-
ctx->ane_model_path = ane_model_path;
4668+
void clip_set_coreml_model_path(struct clip_ctx * ctx, const char * coreml_model_path) {
4669+
if (ctx && coreml_model_path) {
4670+
ctx->coreml_model_path = coreml_model_path;
46804671
}
46814672
}

tools/mtmd/clip.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -110,5 +110,5 @@ bool clip_has_vision_encoder(const struct clip_ctx * ctx);
110110
bool clip_has_audio_encoder(const struct clip_ctx * ctx);
111111
bool clip_has_whisper_encoder(const struct clip_ctx * ctx);
112112

113-
// ANE support functions
114-
void clip_set_ane_model_path(struct clip_ctx * ctx, const char * ane_model_path);
113+
// CoreML support functions
114+
void clip_set_coreml_model_path(struct clip_ctx * ctx, const char * coreml_model_path);
File renamed without changes.
File renamed without changes.
File renamed without changes.

tools/mtmd/ane/ane.mm renamed to tools/mtmd/coreml/mtmd_coreml.mm

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
#import <CoreML/CoreML.h>
22
#import <Accelerate/Accelerate.h>
3-
#import "ane.h"
3+
#import "mtmd_coreml.h"
44
#import "ane_minicpmv4_vit_f16.h"
55
#include <stdlib.h>
66

@@ -19,36 +19,36 @@
1919
// Check if file exists
2020
NSFileManager *fileManager = [NSFileManager defaultManager];
2121
if (![fileManager fileExistsAtPath:pathString]) {
22-
NSLog(@"Error: ANE model file does not exist at path: %@", pathString);
22+
NSLog(@"Error: CoreML model file does not exist at path: %@", pathString);
2323
return nullptr;
2424
}
2525

2626
// Check if it's a directory (for .mlmodelc packages)
2727
BOOL isDirectory;
2828
if ([fileManager fileExistsAtPath:pathString isDirectory:&isDirectory]) {
2929
if (!isDirectory && ![pathString hasSuffix:@".mlmodelc"]) {
30-
NSLog(@"Warning: ANE model path should typically be a .mlmodelc directory: %@", pathString);
30+
NSLog(@"Warning: CoreML model path should typically be a .mlmodelc directory: %@", pathString);
3131
}
3232
}
3333

3434
NSURL *modelURL = [NSURL fileURLWithPath:pathString];
3535

36-
NSLog(@"Loading ANE model from: %@", modelURL.absoluteString);
36+
NSLog(@"Loading CoreML model from: %@", modelURL.absoluteString);
3737

3838
NSError *error = nil;
3939
const void* model = CFBridgingRetain([[ane_minicpmv4_vit_f16 alloc] initWithContentsOfURL:modelURL error:&error]);
4040

4141
if (error) {
42-
NSLog(@"Error loading ANE model: %@", error.localizedDescription);
42+
NSLog(@"Error loading CoreML model: %@", error.localizedDescription);
4343
return nullptr;
4444
}
4545

4646
if (!model) {
47-
NSLog(@"Error: Failed to create ANE model instance");
47+
NSLog(@"Error: Failed to create CoreML model instance");
4848
return nullptr;
4949
}
5050

51-
NSLog(@"Successfully loaded ANE model from: %@", pathString);
51+
NSLog(@"Successfully loaded CoreML model from: %@", pathString);
5252
return model;
5353
}
5454

tools/mtmd/mtmd-cli.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,7 @@ struct mtmd_cli_context {
132132
mparams.print_timings = true;
133133
mparams.n_threads = params.cpuparams.n_threads;
134134
mparams.verbosity = params.verbosity > 0 ? GGML_LOG_LEVEL_DEBUG : GGML_LOG_LEVEL_INFO;
135-
mparams.ane_model_path = params.ane.path.empty() ? nullptr : params.ane.path.c_str();
135+
mparams.coreml_model_path = params.coreml.path.empty() ? nullptr : params.coreml.path.c_str();
136136
ctx_vision.reset(mtmd_init_from_file(clip_path, model, mparams));
137137
if (!ctx_vision.get()) {
138138
LOG_ERR("Failed to load vision model from %s\n", clip_path);

0 commit comments

Comments
 (0)