Skip to content

Commit c2e5dd5

Browse files
committed
metal : refactor device
ggml-ci
1 parent 593478f commit c2e5dd5

File tree

5 files changed

+519
-510
lines changed

5 files changed

+519
-510
lines changed

ggml/src/ggml-metal/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@ message(STATUS "Metal framework found")
66

77
ggml_add_backend_library(ggml-metal
88
ggml-metal.m
9+
ggml-metal-device.m
10+
ggml-metal-device.cpp
911
ggml-metal-common.cpp
1012
)
1113

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
#include "ggml-metal-device.h"
2+
3+
#include <memory>
4+
5+
struct ggml_backend_metal_device_deleter {
6+
void operator()(ggml_backend_metal_device_t ctx) {
7+
ggml_backend_metal_device_free(ctx);
8+
}
9+
};
10+
11+
typedef std::unique_ptr<ggml_backend_metal_device, ggml_backend_metal_device_deleter> ggml_backend_metal_device_ptr;
12+
13+
ggml_backend_metal_device_t ggml_backend_metal_device_get(void) {
14+
static ggml_backend_metal_device_ptr ctx { ggml_backend_metal_device_init() };
15+
16+
return ctx.get();
17+
}
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
#pragma once
2+
3+
#include "ggml.h"
4+
5+
#ifdef __cplusplus
6+
extern "C" {
7+
#endif
8+
9+
typedef struct ggml_backend_metal_device * ggml_backend_metal_device_t;
10+
11+
struct ggml_backend_metal_device_props {
12+
char name[128];
13+
14+
size_t max_buffer_size;
15+
size_t max_working_set_size;
16+
size_t max_theadgroup_memory_size;
17+
18+
bool has_simdgroup_reduction;
19+
bool has_simdgroup_mm;
20+
bool has_unified_memory;
21+
bool has_bfloat;
22+
bool use_bfloat;
23+
bool use_residency_sets;
24+
bool use_shared_buffers;
25+
26+
bool supports_gpu_family_apple7;
27+
};
28+
29+
ggml_backend_metal_device_t ggml_backend_metal_device_init(void);
30+
void ggml_backend_metal_device_free(ggml_backend_metal_device_t ctx);
31+
32+
// return a singleton that is automatically destroyed when the program exits
33+
ggml_backend_metal_device_t ggml_backend_metal_device_get(void);
34+
35+
void * ggml_backend_metal_device_get_device (ggml_backend_metal_device_t ctx);
36+
void * ggml_backend_metal_device_get_library(ggml_backend_metal_device_t ctx);
37+
void * ggml_backend_metal_device_get_queue (ggml_backend_metal_device_t ctx);
38+
39+
void ggml_backend_metal_device_get_memory(ggml_backend_metal_device_t ctx, size_t * free, size_t * total);
40+
41+
struct ggml_backend_metal_device_props ggml_backend_metal_device_get_props(ggml_backend_metal_device_t ctx);
42+
43+
#ifdef __cplusplus
44+
}
45+
#endif
Lines changed: 301 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,301 @@
1+
#import "ggml-metal-device.h"
2+
3+
#import "ggml-impl.h"
4+
5+
#include <Foundation/Foundation.h>
6+
7+
#include <Metal/Metal.h>
8+
9+
// overload of MTLGPUFamilyMetal3 (not available in some environments)
10+
static const NSInteger MTLGPUFamilyMetal3_GGML = 5001;
11+
12+
struct ggml_backend_metal_device {
13+
id<MTLDevice> mtl_device;
14+
id<MTLLibrary> mtl_library;
15+
16+
// a single global queue shared by all Metal backends
17+
// technically not needed for devices with unified memory, but enables discrete GPUs support
18+
// ref: https://github.com/ggml-org/llama.cpp/pull/15906
19+
id<MTLCommandQueue> mtl_queue;
20+
21+
struct ggml_backend_metal_device_props props;
22+
};
23+
24+
ggml_backend_metal_device_t ggml_backend_metal_device_init(void) {
25+
ggml_backend_metal_device_t ctx = calloc(1, sizeof(struct ggml_backend_metal_device));
26+
27+
assert(ctx != NULL);
28+
29+
if (ctx->mtl_device == nil) {
30+
ctx->mtl_device = MTLCreateSystemDefaultDevice();
31+
32+
if (ctx->mtl_device) {
33+
ctx->mtl_queue = [ctx->mtl_device newCommandQueue];
34+
if (ctx->mtl_queue == nil) {
35+
GGML_LOG_ERROR("%s: error: failed to create command queue\n", __func__);
36+
}
37+
38+
ctx->props.has_simdgroup_reduction = [ctx->mtl_device supportsFamily:MTLGPUFamilyApple7];
39+
ctx->props.has_simdgroup_reduction |= [ctx->mtl_device supportsFamily:MTLGPUFamilyMetal3_GGML];
40+
41+
ctx->props.has_simdgroup_mm = [ctx->mtl_device supportsFamily:MTLGPUFamilyApple7];
42+
ctx->props.has_unified_memory = ctx->mtl_device.hasUnifiedMemory;
43+
44+
ctx->props.has_bfloat = [ctx->mtl_device supportsFamily:MTLGPUFamilyMetal3_GGML];
45+
ctx->props.has_bfloat |= [ctx->mtl_device supportsFamily:MTLGPUFamilyApple6];
46+
47+
#if defined(GGML_METAL_USE_BF16)
48+
ctx->props.use_bfloat = ctx->props.has_bfloat;
49+
#else
50+
ctx->props.use_bfloat = false;
51+
#endif
52+
53+
ctx->props.use_residency_sets = true;
54+
#if defined(GGML_METAL_HAS_RESIDENCY_SETS)
55+
ctx->props.use_residency_sets = getenv("GGML_METAL_NO_RESIDENCY") == nil;
56+
#endif
57+
58+
ctx->props.use_shared_buffers = ctx->props.has_unified_memory;
59+
60+
if (getenv("GGML_METAL_SHARED_BUFFERS_DISABLE") != NULL) {
61+
ctx->props.use_shared_buffers = false;
62+
}
63+
64+
ctx->props.supports_gpu_family_apple7 = [ctx->mtl_device supportsFamily:MTLGPUFamilyApple7];
65+
66+
ctx->props.max_buffer_size = ctx->mtl_device.maxBufferLength;
67+
ctx->props.max_working_set_size = ctx->mtl_device.recommendedMaxWorkingSetSize;
68+
ctx->props.max_theadgroup_memory_size = ctx->mtl_device.maxThreadgroupMemoryLength;
69+
70+
strncpy(ctx->props.name, [[ctx->mtl_device name] UTF8String], sizeof(ctx->props.name) - 1);
71+
72+
// load library
73+
//
74+
// - first check if the library is embedded
75+
// - then check if the library is in the bundle
76+
// - if not found, load the source and compile it
77+
// - if that fails, return NULL
78+
{
79+
const int64_t t_start = ggml_time_us();
80+
81+
NSError * error = nil;
82+
NSString * src = nil;
83+
84+
#if GGML_METAL_EMBED_LIBRARY
85+
GGML_LOG_INFO("%s: using embedded metal library\n", __func__);
86+
87+
extern const char ggml_metallib_start[];
88+
extern const char ggml_metallib_end[];
89+
90+
src = [[NSString alloc] initWithBytes:ggml_metallib_start length:(ggml_metallib_end-ggml_metallib_start) encoding:NSUTF8StringEncoding];
91+
92+
#else
93+
94+
#ifdef SWIFT_PACKAGE
95+
NSBundle * bundle = SWIFTPM_MODULE_BUNDLE;
96+
#else
97+
NSBundle * bundle = [NSBundle bundleForClass:[GGMLMetalClass class]];
98+
#endif
99+
100+
NSString * path_lib = [bundle pathForResource:@"default" ofType:@"metallib"];
101+
if (path_lib == nil) {
102+
// Try to find the resource in the directory where the current binary located.
103+
NSString * bin_cur = [[NSProcessInfo processInfo] arguments][0];
104+
NSString * bin_dir = [bin_cur stringByDeletingLastPathComponent];
105+
106+
NSString * path_lib_default = [NSString pathWithComponents:@[bin_dir, @"default.metallib"]];
107+
if ([[NSFileManager defaultManager] isReadableFileAtPath:path_lib_default]) {
108+
GGML_LOG_INFO("%s: found '%s'\n", __func__, [path_lib_default UTF8String]);
109+
110+
NSDictionary * atts = [[NSFileManager defaultManager] attributesOfItemAtPath:path_lib_default error:&error];
111+
if (atts && atts[NSFileType] == NSFileTypeSymbolicLink) {
112+
// Optionally, if this is a symlink, try to resolve it.
113+
path_lib_default = [[NSFileManager defaultManager] destinationOfSymbolicLinkAtPath:path_lib_default error:&error];
114+
if (path_lib_default && [path_lib_default length] > 0 && ![[path_lib_default substringToIndex:1] isEqualToString:@"/"]) {
115+
// It is a relative path, adding the binary directory as directory prefix.
116+
path_lib_default = [NSString pathWithComponents:@[bin_dir, path_lib_default]];
117+
}
118+
if (!path_lib_default || ![[NSFileManager defaultManager] isReadableFileAtPath:path_lib_default]) {
119+
// Link to the resource could not be resolved.
120+
path_lib_default = nil;
121+
} else {
122+
GGML_LOG_INFO("%s: symlink resolved '%s'\n", __func__, [path_lib_default UTF8String]);
123+
}
124+
}
125+
} else {
126+
// The resource couldn't be found in the binary's directory.
127+
path_lib_default = nil;
128+
}
129+
130+
path_lib = path_lib_default;
131+
}
132+
133+
if (path_lib != nil) {
134+
// pre-compiled library found
135+
NSURL * libURL = [NSURL fileURLWithPath:path_lib];
136+
GGML_LOG_INFO("%s: loading '%s'\n", __func__, [path_lib UTF8String]);
137+
138+
ctx->mtl_library = [ctx->mtl_device newLibraryWithURL:libURL error:&error];
139+
if (error) {
140+
GGML_LOG_ERROR("%s: error: %s\n", __func__, [[error description] UTF8String]);
141+
}
142+
} else {
143+
GGML_LOG_INFO("%s: default.metallib not found, loading from source\n", __func__);
144+
145+
NSString * path_source;
146+
NSString * path_resource = [[NSProcessInfo processInfo].environment objectForKey:@"GGML_METAL_PATH_RESOURCES"];
147+
148+
GGML_LOG_INFO("%s: GGML_METAL_PATH_RESOURCES = %s\n", __func__, path_resource ? [path_resource UTF8String] : "nil");
149+
150+
if (path_resource) {
151+
path_source = [path_resource stringByAppendingPathComponent:@"ggml-metal.metal"];
152+
} else {
153+
path_source = [bundle pathForResource:@"ggml-metal" ofType:@"metal"];
154+
}
155+
156+
if (path_source == nil) {
157+
GGML_LOG_WARN("%s: error: could not use bundle path to find ggml-metal.metal, falling back to trying cwd\n", __func__);
158+
path_source = @"ggml-metal.metal";
159+
}
160+
161+
GGML_LOG_INFO("%s: loading '%s'\n", __func__, [path_source UTF8String]);
162+
163+
src = [NSString stringWithContentsOfFile:path_source encoding:NSUTF8StringEncoding error:&error];
164+
if (error) {
165+
GGML_LOG_ERROR("%s: error: %s\n", __func__, [[error description] UTF8String]);
166+
}
167+
}
168+
#endif
169+
170+
if (!ctx->mtl_library) {
171+
@autoreleasepool {
172+
// dictionary of preprocessor macros
173+
NSMutableDictionary * prep = [NSMutableDictionary dictionary];
174+
175+
if (ctx->props.use_bfloat) {
176+
[prep setObject:@"1" forKey:@"GGML_METAL_USE_BF16"];
177+
}
178+
179+
#if GGML_METAL_EMBED_LIBRARY
180+
[prep setObject:@"1" forKey:@"GGML_METAL_EMBED_LIBRARY"];
181+
#endif
182+
183+
MTLCompileOptions * options = [MTLCompileOptions new];
184+
options.preprocessorMacros = prep;
185+
186+
//[options setFastMathEnabled:false];
187+
188+
ctx->mtl_library = [ctx->mtl_device newLibraryWithSource:src options:options error:&error];
189+
if (error) {
190+
GGML_LOG_ERROR("%s: error: %s\n", __func__, [[error description] UTF8String]);
191+
}
192+
193+
#if !__has_feature(objc_arc)
194+
[options release];
195+
#endif
196+
}
197+
}
198+
199+
#if GGML_METAL_EMBED_LIBRARY
200+
[src release];
201+
#endif // GGML_METAL_EMBED_LIBRARY
202+
203+
GGML_LOG_INFO("%s: loaded in %.3f sec\n", __func__, (ggml_time_us() - t_start) / 1e6);
204+
}
205+
206+
// --------------------------------------------------
207+
208+
// print MTL GPU family:
209+
GGML_LOG_INFO("%s: GPU name: %s\n", __func__, ctx->props.name);
210+
211+
// determine max supported GPU family
212+
// https://developer.apple.com/metal/Metal-Shading-Language-Specification.pdf
213+
// https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf
214+
{
215+
for (int i = MTLGPUFamilyApple1 + 20; i >= MTLGPUFamilyApple1; --i) {
216+
if ([ctx->mtl_device supportsFamily:i]) {
217+
GGML_LOG_INFO("%s: GPU family: MTLGPUFamilyApple%d (%d)\n", __func__, i - (int) MTLGPUFamilyApple1 + 1, i);
218+
break;
219+
}
220+
}
221+
222+
for (int i = MTLGPUFamilyCommon1 + 5; i >= MTLGPUFamilyCommon1; --i) {
223+
if ([ctx->mtl_device supportsFamily:i]) {
224+
GGML_LOG_INFO("%s: GPU family: MTLGPUFamilyCommon%d (%d)\n", __func__, i - (int) MTLGPUFamilyCommon1 + 1, i);
225+
break;
226+
}
227+
}
228+
229+
for (int i = MTLGPUFamilyMetal3_GGML + 5; i >= MTLGPUFamilyMetal3_GGML; --i) {
230+
if ([ctx->mtl_device supportsFamily:i]) {
231+
GGML_LOG_INFO("%s: GPU family: MTLGPUFamilyMetal%d (%d)\n", __func__, i - (int) MTLGPUFamilyMetal3_GGML + 3, i);
232+
break;
233+
}
234+
}
235+
}
236+
237+
GGML_LOG_INFO("%s: simdgroup reduction = %s\n", __func__, ctx->props.has_simdgroup_reduction ? "true" : "false");
238+
GGML_LOG_INFO("%s: simdgroup matrix mul. = %s\n", __func__, ctx->props.has_simdgroup_mm ? "true" : "false");
239+
GGML_LOG_INFO("%s: has unified memory = %s\n", __func__, ctx->props.has_unified_memory ? "true" : "false");
240+
GGML_LOG_INFO("%s: has bfloat = %s\n", __func__, ctx->props.has_bfloat ? "true" : "false");
241+
GGML_LOG_INFO("%s: use bfloat = %s\n", __func__, ctx->props.use_bfloat ? "true" : "false");
242+
GGML_LOG_INFO("%s: use residency sets = %s\n", __func__, ctx->props.use_residency_sets ? "true" : "false");
243+
GGML_LOG_INFO("%s: use shared buffers = %s\n", __func__, ctx->props.use_shared_buffers ? "true" : "false");
244+
245+
#if TARGET_OS_OSX || (TARGET_OS_IOS && __clang_major__ >= 15)
246+
if (@available(macOS 10.12, iOS 16.0, *)) {
247+
GGML_LOG_INFO("%s: recommendedMaxWorkingSetSize = %8.2f MB\n", __func__, ctx->props.max_working_set_size / 1e6);
248+
}
249+
#endif
250+
}
251+
}
252+
253+
return ctx;
254+
}
255+
256+
void ggml_backend_metal_device_free(ggml_backend_metal_device_t ctx) {
257+
assert(ctx != NULL);
258+
259+
if (ctx->mtl_library) {
260+
[ctx->mtl_library release];
261+
ctx->mtl_library = nil;
262+
}
263+
264+
if (ctx->mtl_queue) {
265+
[ctx->mtl_queue release];
266+
ctx->mtl_queue = nil;
267+
}
268+
269+
if (ctx->mtl_device) {
270+
[ctx->mtl_device release];
271+
ctx->mtl_device = nil;
272+
}
273+
274+
free(ctx);
275+
}
276+
277+
void * ggml_backend_metal_device_get_device(ggml_backend_metal_device_t ctx) {
278+
return ctx->mtl_device;
279+
}
280+
281+
void * ggml_backend_metal_device_get_library(ggml_backend_metal_device_t ctx) {
282+
return ctx->mtl_library;
283+
}
284+
285+
void * ggml_backend_metal_device_get_queue(ggml_backend_metal_device_t ctx) {
286+
return ctx->mtl_queue;
287+
}
288+
289+
void ggml_backend_metal_device_get_memory(ggml_backend_metal_device_t ctx, size_t * free, size_t * total) {
290+
if (@available(macOS 10.12, iOS 16.0, *)) {
291+
*total = ctx->mtl_device.recommendedMaxWorkingSetSize;
292+
*free = *total - ctx->mtl_device.currentAllocatedSize;
293+
} else {
294+
*free = 0;
295+
*total = 0;
296+
}
297+
}
298+
299+
struct ggml_backend_metal_device_props ggml_backend_metal_device_get_props(ggml_backend_metal_device_t ctx) {
300+
return ctx->props;
301+
}

0 commit comments

Comments
 (0)