Skip to content

Commit 1a52c75

Browse files
committed
vit+resampler to coreml
Signed-off-by: tc-mb <[email protected]>
1 parent fd64e45 commit 1a52c75

File tree

8 files changed

+230
-405
lines changed

8 files changed

+230
-405
lines changed

tools/mtmd/CMakeLists.txt

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -21,15 +21,15 @@ if(ENABLE_COREML)
2121
target_sources(mtmd PRIVATE
2222
coreml/mtmd_coreml.h
2323
coreml/mtmd_coreml.mm
24-
coreml/ane_minicpmv4_vit_f16.h
25-
coreml/ane_minicpmv4_vit_f16.m
24+
coreml/coreml_minicpmv40_vit_f16.h
25+
coreml/coreml_minicpmv40_vit_f16.m
2626
)
2727
# Define compile-time macro for code guards
2828
target_compile_definitions(mtmd PRIVATE ENABLE_COREML)
29-
29+
3030
# Enable ARC for Objective-C files
3131
set_source_files_properties(coreml/mtmd_coreml.mm PROPERTIES COMPILE_FLAGS "-fobjc-arc")
32-
set_source_files_properties(coreml/ane_minicpmv4_vit_f16.m PROPERTIES COMPILE_FLAGS "-fobjc-arc")
32+
set_source_files_properties(coreml/coreml_minicpmv40_vit_f16.m PROPERTIES COMPILE_FLAGS "-fobjc-arc")
3333
endif()
3434

3535
target_link_libraries (mtmd PUBLIC ggml llama common)
@@ -45,9 +45,9 @@ target_compile_features (mtmd PRIVATE cxx_std_17)
4545

4646
# Link CoreML and Accelerate frameworks when CoreML is enabled
4747
if(ENABLE_COREML)
48-
target_link_libraries(mtmd PRIVATE
49-
"-framework Foundation"
50-
"-framework CoreML"
48+
target_link_libraries(mtmd PRIVATE
49+
"-framework Foundation"
50+
"-framework CoreML"
5151
"-framework Accelerate"
5252
"-ObjC"
5353
)

tools/mtmd/clip.cpp

Lines changed: 81 additions & 293 deletions
Large diffs are not rendered by default.

tools/mtmd/clip.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,7 @@ struct ggml_tensor * clip_get_newline_tensor(const struct clip_ctx * ctx);
9494

9595
bool clip_image_encode (struct clip_ctx * ctx, int n_threads, struct clip_image_f32 * img, float * vec);
9696
bool clip_image_batch_encode(struct clip_ctx * ctx, int n_threads, const struct clip_image_f32_batch * imgs, float * vec);
97+
bool clip_image_batch_encode_coreml(struct clip_ctx * ctx, const struct clip_image_f32_batch * imgs, float * vec);
9798

9899
int clip_is_minicpmv(const struct clip_ctx * ctx);
99100
bool clip_is_glm(const struct clip_ctx * ctx);
Lines changed: 46 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
//
2-
// ane_minicpmv4_vit_f16.h
2+
// coreml_minicpmv40_vit_f16.h
33
//
44
// This file was automatically generated and should not be edited.
55
//
@@ -13,20 +13,26 @@ NS_ASSUME_NONNULL_BEGIN
1313

1414
/// Model Prediction Input Type
1515
API_AVAILABLE(macos(12.0), ios(15.0), watchos(8.0), tvos(15.0)) __attribute__((visibility("hidden")))
16-
@interface ane_minicpmv4_vit_f16Input : NSObject<MLFeatureProvider>
16+
@interface coreml_minicpmv40_vit_f16Input : NSObject<MLFeatureProvider>
1717

18-
/// input as 1 × 1024 × 1152 3-dimensional array of floats
19-
@property (readwrite, nonatomic, strong) MLMultiArray * input;
18+
/// pixel_values as 1 × 3 × 14 × 14336 4-dimensional array of floats
19+
@property (readwrite, nonatomic, strong) MLMultiArray * pixel_values;
20+
21+
/// position_ids as 1 by 1024 matrix of 32-bit integers
22+
@property (readwrite, nonatomic, strong) MLMultiArray * position_ids;
23+
24+
/// pos_embed as 1024 × 1 × 2560 3-dimensional array of floats
25+
@property (readwrite, nonatomic, strong) MLMultiArray * pos_embed;
2026
- (instancetype)init NS_UNAVAILABLE;
21-
- (instancetype)initWithInput:(MLMultiArray *)input NS_DESIGNATED_INITIALIZER;
27+
- (instancetype)initWithPixel_values:(MLMultiArray *)pixel_values position_ids:(MLMultiArray *)position_ids pos_embed:(MLMultiArray *)pos_embed NS_DESIGNATED_INITIALIZER;
2228

2329
@end
2430

2531
/// Model Prediction Output Type
2632
API_AVAILABLE(macos(12.0), ios(15.0), watchos(8.0), tvos(15.0)) __attribute__((visibility("hidden")))
27-
@interface ane_minicpmv4_vit_f16Output : NSObject<MLFeatureProvider>
33+
@interface coreml_minicpmv40_vit_f16Output : NSObject<MLFeatureProvider>
2834

29-
/// output as 1 × 1024 × 1152 3-dimensional array of floats
35+
/// output as 1 × 64 × 2560 3-dimensional array of floats
3036
@property (readwrite, nonatomic, strong) MLMultiArray * output;
3137
- (instancetype)init NS_UNAVAILABLE;
3238
- (instancetype)initWithOutput:(MLMultiArray *)output NS_DESIGNATED_INITIALIZER;
@@ -35,7 +41,7 @@ API_AVAILABLE(macos(12.0), ios(15.0), watchos(8.0), tvos(15.0)) __attribute__((v
3541

3642
/// Class for model loading and prediction
3743
API_AVAILABLE(macos(12.0), ios(15.0), watchos(8.0), tvos(15.0)) __attribute__((visibility("hidden")))
38-
@interface ane_minicpmv4_vit_f16 : NSObject
44+
@interface coreml_minicpmv40_vit_f16 : NSObject
3945
@property (readonly, nonatomic, nullable) MLModel * model;
4046

4147
/**
@@ -44,111 +50,113 @@ API_AVAILABLE(macos(12.0), ios(15.0), watchos(8.0), tvos(15.0)) __attribute__((v
4450
+ (nullable NSURL *)URLOfModelInThisBundle;
4551

4652
/**
47-
Initialize ane_minicpmv4_vit_f16 instance from an existing MLModel object.
53+
Initialize coreml_minicpmv40_vit_f16 instance from an existing MLModel object.
4854
49-
Usually the application does not use this initializer unless it makes a subclass of ane_minicpmv4_vit_f16.
55+
Usually the application does not use this initializer unless it makes a subclass of coreml_minicpmv40_vit_f16.
5056
Such application may want to use `-[MLModel initWithContentsOfURL:configuration:error:]` and `+URLOfModelInThisBundle` to create a MLModel object to pass-in.
5157
*/
5258
- (instancetype)initWithMLModel:(MLModel *)model NS_DESIGNATED_INITIALIZER;
5359

5460
/**
55-
Initialize ane_minicpmv4_vit_f16 instance with the model in this bundle.
61+
Initialize coreml_minicpmv40_vit_f16 instance with the model in this bundle.
5662
*/
5763
- (nullable instancetype)init;
5864

5965
/**
60-
Initialize ane_minicpmv4_vit_f16 instance with the model in this bundle.
66+
Initialize coreml_minicpmv40_vit_f16 instance with the model in this bundle.
6167
6268
@param configuration The model configuration object
6369
@param error If an error occurs, upon return contains an NSError object that describes the problem. If you are not interested in possible errors, pass in NULL.
6470
*/
6571
- (nullable instancetype)initWithConfiguration:(MLModelConfiguration *)configuration error:(NSError * _Nullable __autoreleasing * _Nullable)error;
6672

6773
/**
68-
Initialize ane_minicpmv4_vit_f16 instance from the model URL.
74+
Initialize coreml_minicpmv40_vit_f16 instance from the model URL.
6975
70-
@param modelURL URL to the .mlmodelc directory for ane_minicpmv4_vit_f16.
76+
@param modelURL URL to the .mlmodelc directory for coreml_minicpmv40_vit_f16.
7177
@param error If an error occurs, upon return contains an NSError object that describes the problem. If you are not interested in possible errors, pass in NULL.
7278
*/
7379
- (nullable instancetype)initWithContentsOfURL:(NSURL *)modelURL error:(NSError * _Nullable __autoreleasing * _Nullable)error;
7480

7581
/**
76-
Initialize ane_minicpmv4_vit_f16 instance from the model URL.
82+
Initialize coreml_minicpmv40_vit_f16 instance from the model URL.
7783
78-
@param modelURL URL to the .mlmodelc directory for ane_minicpmv4_vit_f16.
84+
@param modelURL URL to the .mlmodelc directory for coreml_minicpmv40_vit_f16.
7985
@param configuration The model configuration object
8086
@param error If an error occurs, upon return contains an NSError object that describes the problem. If you are not interested in possible errors, pass in NULL.
8187
*/
8288
- (nullable instancetype)initWithContentsOfURL:(NSURL *)modelURL configuration:(MLModelConfiguration *)configuration error:(NSError * _Nullable __autoreleasing * _Nullable)error;
8389

8490
/**
85-
Construct ane_minicpmv4_vit_f16 instance asynchronously with configuration.
91+
Construct coreml_minicpmv40_vit_f16 instance asynchronously with configuration.
8692
Model loading may take time when the model content is not immediately available (e.g. encrypted model). Use this factory method especially when the caller is on the main thread.
8793
8894
@param configuration The model configuration
89-
@param handler When the model load completes successfully or unsuccessfully, the completion handler is invoked with a valid ane_minicpmv4_vit_f16 instance or NSError object.
95+
@param handler When the model load completes successfully or unsuccessfully, the completion handler is invoked with a valid coreml_minicpmv40_vit_f16 instance or NSError object.
9096
*/
91-
+ (void)loadWithConfiguration:(MLModelConfiguration *)configuration completionHandler:(void (^)(ane_minicpmv4_vit_f16 * _Nullable model, NSError * _Nullable error))handler;
97+
+ (void)loadWithConfiguration:(MLModelConfiguration *)configuration completionHandler:(void (^)(coreml_minicpmv40_vit_f16 * _Nullable model, NSError * _Nullable error))handler;
9298

9399
/**
94-
Construct ane_minicpmv4_vit_f16 instance asynchronously with URL of .mlmodelc directory and optional configuration.
100+
Construct coreml_minicpmv40_vit_f16 instance asynchronously with URL of .mlmodelc directory and optional configuration.
95101
96102
Model loading may take time when the model content is not immediately available (e.g. encrypted model). Use this factory method especially when the caller is on the main thread.
97103
98104
@param modelURL The model URL.
99105
@param configuration The model configuration
100-
@param handler When the model load completes successfully or unsuccessfully, the completion handler is invoked with a valid ane_minicpmv4_vit_f16 instance or NSError object.
106+
@param handler When the model load completes successfully or unsuccessfully, the completion handler is invoked with a valid coreml_minicpmv40_vit_f16 instance or NSError object.
101107
*/
102-
+ (void)loadContentsOfURL:(NSURL *)modelURL configuration:(MLModelConfiguration *)configuration completionHandler:(void (^)(ane_minicpmv4_vit_f16 * _Nullable model, NSError * _Nullable error))handler;
108+
+ (void)loadContentsOfURL:(NSURL *)modelURL configuration:(MLModelConfiguration *)configuration completionHandler:(void (^)(coreml_minicpmv40_vit_f16 * _Nullable model, NSError * _Nullable error))handler;
103109

104110
/**
105111
Make a prediction using the standard interface
106-
@param input an instance of ane_minicpmv4_vit_f16Input to predict from
112+
@param input an instance of coreml_minicpmv40_vit_f16Input to predict from
107113
@param error If an error occurs, upon return contains an NSError object that describes the problem. If you are not interested in possible errors, pass in NULL.
108-
@return the prediction as ane_minicpmv4_vit_f16Output
114+
@return the prediction as coreml_minicpmv40_vit_f16Output
109115
*/
110-
- (nullable ane_minicpmv4_vit_f16Output *)predictionFromFeatures:(ane_minicpmv4_vit_f16Input *)input error:(NSError * _Nullable __autoreleasing * _Nullable)error;
116+
- (nullable coreml_minicpmv40_vit_f16Output *)predictionFromFeatures:(coreml_minicpmv40_vit_f16Input *)input error:(NSError * _Nullable __autoreleasing * _Nullable)error;
111117

112118
/**
113119
Make a prediction using the standard interface
114-
@param input an instance of ane_minicpmv4_vit_f16Input to predict from
120+
@param input an instance of coreml_minicpmv40_vit_f16Input to predict from
115121
@param options prediction options
116122
@param error If an error occurs, upon return contains an NSError object that describes the problem. If you are not interested in possible errors, pass in NULL.
117-
@return the prediction as ane_minicpmv4_vit_f16Output
123+
@return the prediction as coreml_minicpmv40_vit_f16Output
118124
*/
119-
- (nullable ane_minicpmv4_vit_f16Output *)predictionFromFeatures:(ane_minicpmv4_vit_f16Input *)input options:(MLPredictionOptions *)options error:(NSError * _Nullable __autoreleasing * _Nullable)error;
125+
- (nullable coreml_minicpmv40_vit_f16Output *)predictionFromFeatures:(coreml_minicpmv40_vit_f16Input *)input options:(MLPredictionOptions *)options error:(NSError * _Nullable __autoreleasing * _Nullable)error;
120126

121127
/**
122128
Make an asynchronous prediction using the standard interface
123-
@param input an instance of ane_minicpmv4_vit_f16Input to predict from
129+
@param input an instance of coreml_minicpmv40_vit_f16Input to predict from
124130
@param completionHandler a block that will be called upon completion of the prediction. error will be nil if no error occurred.
125131
*/
126-
- (void)predictionFromFeatures:(ane_minicpmv4_vit_f16Input *)input completionHandler:(void (^)(ane_minicpmv4_vit_f16Output * _Nullable output, NSError * _Nullable error))completionHandler API_AVAILABLE(macos(14.0), ios(17.0), watchos(10.0), tvos(17.0)) __attribute__((visibility("hidden")));
132+
- (void)predictionFromFeatures:(coreml_minicpmv40_vit_f16Input *)input completionHandler:(void (^)(coreml_minicpmv40_vit_f16Output * _Nullable output, NSError * _Nullable error))completionHandler API_AVAILABLE(macos(14.0), ios(17.0), watchos(10.0), tvos(17.0)) __attribute__((visibility("hidden")));
127133

128134
/**
129135
Make an asynchronous prediction using the standard interface
130-
@param input an instance of ane_minicpmv4_vit_f16Input to predict from
136+
@param input an instance of coreml_minicpmv40_vit_f16Input to predict from
131137
@param options prediction options
132138
@param completionHandler a block that will be called upon completion of the prediction. error will be nil if no error occurred.
133139
*/
134-
- (void)predictionFromFeatures:(ane_minicpmv4_vit_f16Input *)input options:(MLPredictionOptions *)options completionHandler:(void (^)(ane_minicpmv4_vit_f16Output * _Nullable output, NSError * _Nullable error))completionHandler API_AVAILABLE(macos(14.0), ios(17.0), watchos(10.0), tvos(17.0)) __attribute__((visibility("hidden")));
140+
- (void)predictionFromFeatures:(coreml_minicpmv40_vit_f16Input *)input options:(MLPredictionOptions *)options completionHandler:(void (^)(coreml_minicpmv40_vit_f16Output * _Nullable output, NSError * _Nullable error))completionHandler API_AVAILABLE(macos(14.0), ios(17.0), watchos(10.0), tvos(17.0)) __attribute__((visibility("hidden")));
135141

136142
/**
137143
Make a prediction using the convenience interface
138-
@param input 1 × 1024 × 1152 3-dimensional array of floats
144+
@param pixel_values 1 × 3 × 14 × 14336 4-dimensional array of floats
145+
@param position_ids 1 by 1024 matrix of 32-bit integers
146+
@param pos_embed 1024 × 1 × 2560 3-dimensional array of floats
139147
@param error If an error occurs, upon return contains an NSError object that describes the problem. If you are not interested in possible errors, pass in NULL.
140-
@return the prediction as ane_minicpmv4_vit_f16Output
148+
@return the prediction as coreml_minicpmv40_vit_f16Output
141149
*/
142-
- (nullable ane_minicpmv4_vit_f16Output *)predictionFromInput:(MLMultiArray *)input error:(NSError * _Nullable __autoreleasing * _Nullable)error;
150+
- (nullable coreml_minicpmv40_vit_f16Output *)predictionFromPixel_values:(MLMultiArray *)pixel_values position_ids:(MLMultiArray *)position_ids pos_embed:(MLMultiArray *)pos_embed error:(NSError * _Nullable __autoreleasing * _Nullable)error;
143151

144152
/**
145153
Batch prediction
146-
@param inputArray array of ane_minicpmv4_vit_f16Input instances to obtain predictions from
154+
@param inputArray array of coreml_minicpmv40_vit_f16Input instances to obtain predictions from
147155
@param options prediction options
148156
@param error If an error occurs, upon return contains an NSError object that describes the problem. If you are not interested in possible errors, pass in NULL.
149-
@return the predictions as NSArray<ane_minicpmv4_vit_f16Output *>
157+
@return the predictions as NSArray<coreml_minicpmv40_vit_f16Output *>
150158
*/
151-
- (nullable NSArray<ane_minicpmv4_vit_f16Output *> *)predictionsFromInputs:(NSArray<ane_minicpmv4_vit_f16Input*> *)inputArray options:(MLPredictionOptions *)options error:(NSError * _Nullable __autoreleasing * _Nullable)error;
159+
- (nullable NSArray<coreml_minicpmv40_vit_f16Output *> *)predictionsFromInputs:(NSArray<coreml_minicpmv40_vit_f16Input*> *)inputArray options:(MLPredictionOptions *)options error:(NSError * _Nullable __autoreleasing * _Nullable)error;
152160
@end
153161

154162
NS_ASSUME_NONNULL_END

0 commit comments

Comments
 (0)