Skip to content

Commit 326f6f3

Browse files
committed
not sure if working on metal
2 parents 7e9db9d + 0320ac5 commit 326f6f3

File tree

302 files changed

+33417
-18635
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

302 files changed

+33417
-18635
lines changed

.editorconfig

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,3 +52,11 @@ insert_final_newline = unset
5252
[vendor/miniaudio/miniaudio.h]
5353
trim_trailing_whitespace = unset
5454
insert_final_newline = unset
55+
56+
[tools/server/webui/**]
57+
indent_style = unset
58+
indent_size = unset
59+
end_of_line = unset
60+
charset = unset
61+
trim_trailing_whitespace = unset
62+
insert_final_newline = unset
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
---
2+
trigger: manual
3+
---
4+
5+
#### Tailwind & CSS
6+
7+
- We are using Tailwind v4 which uses oklch colors so we now want to refer to the CSS vars directly, without wrapping it with any color function like `hsla/hsl`, `rgba` etc.
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
---
2+
trigger: manual
3+
---
4+
5+
# Coding rules
6+
7+
## Svelte & SvelteKit
8+
9+
### Services vs Stores Separation Pattern
10+
11+
#### `lib/services/` - Pure Business Logic
12+
13+
- **Purpose**: Stateless business logic and external communication
14+
- **Contains**:
15+
- API calls to external services (ApiService)
16+
- Pure business logic functions (ChatService, etc.)
17+
- **Rules**:
18+
- NO Svelte runes ($state, $derived, $effect)
19+
- NO reactive state management
20+
- Pure functions and classes only
21+
- Can import types but not stores
22+
- Focus on "how" - implementation details
23+
24+
#### `lib/stores/` - Reactive State Management
25+
26+
- **Purpose**: Svelte-specific reactive state with runes
27+
- **Contains**:
28+
- Reactive state classes with $state, $derived, $effect
29+
- Database operations (DatabaseStore)
30+
- UI-focused state management
31+
- Store orchestration logic
32+
- **Rules**:
33+
- USE Svelte runes for reactivity
34+
- Import and use services for business logic
35+
- NO direct database operations
36+
- NO direct API calls (use services)
37+
- Focus on "what" - reactive state for UI
38+
39+
#### Enforcement
40+
41+
- Services should be testable without Svelte
42+
- Stores should leverage Svelte's reactivity system
43+
- Clear separation: services handle data, stores handle state
44+
- Services can be reused across multiple stores
45+
46+
#### Misc
47+
48+
- Always use `let` for $derived state variables

.windsurf/rules/tests.md

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
---
2+
trigger: manual
3+
---
4+
5+
# Automated Tests
6+
7+
## General rules
8+
9+
- NEVER include any test code in the production code - we should always have it in a separate dedicated files
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
---
2+
trigger: manual
3+
---
4+
5+
## TypeScript
6+
7+
- Add JSDocs for functions

Makefile

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -318,17 +318,29 @@ ifdef LLAMA_METAL
318318
CFLAGS += -DGGML_USE_METAL -DGGML_METAL_NDEBUG -DSD_USE_METAL
319319
CXXFLAGS += -DGGML_USE_METAL -DSD_USE_METAL
320320
LDFLAGS += -framework Foundation -framework Metal -framework MetalKit -framework MetalPerformanceShaders
321-
OBJS += ggml-metal.o ggml-metal-common.o
321+
OBJS += ggml-metal.o ggml-metal-device.o ggml-metal-device-m.o ggml-metal-context-m.o ggml-metal-common.o ggml-metal-ops.o
322322

323323
ggml-metal-common.o: ggml/src/ggml-metal/ggml-metal-common.cpp ggml/src/ggml-metal/ggml-metal-common.h
324324
$(CXX) $(CXXFLAGS) -c $< -o $@
325325

326-
ggml-metal.o: ggml/src/ggml-metal/ggml-metal.m ggml/src/ggml-metal/ggml-metal-impl.h ggml/include/ggml-metal.h
326+
ggml-metal-ops.o: ggml/src/ggml-metal/ggml-metal-ops.cpp ggml/src/ggml-metal/ggml-metal-ops.h
327+
$(CXX) $(CXXFLAGS) -c $< -o $@
328+
329+
ggml-metal.o: ggml/src/ggml-metal/ggml-metal.cpp
330+
$(CXX) $(CXXFLAGS) -c $< -o $@
331+
332+
ggml-metal-device.o: ggml/src/ggml-metal/ggml-metal-device.cpp
333+
$(CXX) $(CXXFLAGS) -c $< -o $@
334+
335+
ggml-metal-device-m.o: ggml/src/ggml-metal/ggml-metal-device.m ggml/src/ggml-metal/ggml-metal-impl.h ggml/include/ggml-metal.h
327336
@echo "== Preparing merged Metal file =="
328337
@sed -e '/#include "ggml-common.h"/r ggml/src/ggml-common.h' -e '/#include "ggml-common.h"/d' < ggml/src/ggml-metal/ggml-metal.metal > ggml/src/ggml-metal/ggml-metal-embed.metal.tmp
329338
@sed -e '/#include "ggml-metal-impl.h"/r ggml/src/ggml-metal/ggml-metal-impl.h' -e '/#include "ggml-metal-impl.h"/d' < ggml/src/ggml-metal/ggml-metal-embed.metal.tmp > ggml/src/ggml-metal/ggml-metal-merged.metal
330339
@cp ggml/src/ggml-metal/ggml-metal-merged.metal ./ggml-metal-merged.metal
331340
$(CC) $(CFLAGS) -c $< -o $@
341+
342+
ggml-metal-context-m.o: ggml/src/ggml-metal/ggml-metal-context.m ggml/src/ggml-metal/ggml-metal-impl.h ggml/include/ggml-metal.h
343+
$(CC) $(CFLAGS) -c $< -o $@
332344
endif # LLAMA_METAL
333345

334346
ifneq ($(filter aarch64%,$(UNAME_M)),)

ggml/include/ggml-metal.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ extern "C" {
3939
// user-code should use only these functions
4040
//
4141

42+
// TODO: remove in the future
4243
GGML_BACKEND_API ggml_backend_t ggml_backend_metal_init(void);
4344

4445
GGML_BACKEND_API bool ggml_backend_is_metal(ggml_backend_t backend);

ggml/include/ggml.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -290,19 +290,19 @@ __host__ __device__ constexpr inline void ggml_unused_vars_impl(Args&&...) noexc
290290
// GGML_TENSOR_LOCALS(size_t, nb1, src1, nb);
291291
//
292292
#define GGML_TENSOR_LOCALS_1(type, prefix, pointer, array) \
293-
const type prefix##0 = (pointer)->array[0]; \
293+
const type prefix##0 = (pointer) ? (pointer)->array[0] : 0; \
294294
GGML_UNUSED(prefix##0);
295295
#define GGML_TENSOR_LOCALS_2(type, prefix, pointer, array) \
296296
GGML_TENSOR_LOCALS_1 (type, prefix, pointer, array) \
297-
const type prefix##1 = (pointer)->array[1]; \
297+
const type prefix##1 = (pointer) ? (pointer)->array[1] : 0; \
298298
GGML_UNUSED(prefix##1);
299299
#define GGML_TENSOR_LOCALS_3(type, prefix, pointer, array) \
300300
GGML_TENSOR_LOCALS_2 (type, prefix, pointer, array) \
301-
const type prefix##2 = (pointer)->array[2]; \
301+
const type prefix##2 = (pointer) ? (pointer)->array[2] : 0; \
302302
GGML_UNUSED(prefix##2);
303303
#define GGML_TENSOR_LOCALS(type, prefix, pointer, array) \
304304
GGML_TENSOR_LOCALS_3 (type, prefix, pointer, array) \
305-
const type prefix##3 = (pointer)->array[3]; \
305+
const type prefix##3 = (pointer) ? (pointer)->array[3] : 0; \
306306
GGML_UNUSED(prefix##3);
307307

308308
#define GGML_TENSOR_UNARY_OP_LOCALS \

ggml/src/ggml-metal/ggml-metal-common.cpp

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ struct ggml_mem_ranges {
2222
int debug = 0;
2323
};
2424

25-
struct ggml_mem_ranges * ggml_mem_ranges_init(int debug) {
25+
ggml_mem_ranges_t ggml_mem_ranges_init(int debug) {
2626
auto * res = new ggml_mem_ranges;
2727

2828
res->ranges.reserve(256);
@@ -31,15 +31,15 @@ struct ggml_mem_ranges * ggml_mem_ranges_init(int debug) {
3131
return res;
3232
}
3333

34-
void ggml_mem_ranges_free(ggml_mem_ranges * mrs) {
34+
void ggml_mem_ranges_free(ggml_mem_ranges_t mrs) {
3535
delete mrs;
3636
}
3737

38-
void ggml_mem_ranges_reset(ggml_mem_ranges * mrs) {
38+
void ggml_mem_ranges_reset(ggml_mem_ranges_t mrs) {
3939
mrs->ranges.clear();
4040
}
4141

42-
static bool ggml_mem_ranges_add(ggml_mem_ranges * mrs, ggml_mem_range mr) {
42+
static bool ggml_mem_ranges_add(ggml_mem_ranges_t mrs, ggml_mem_range mr) {
4343
mrs->ranges.push_back(mr);
4444

4545
return true;
@@ -87,7 +87,7 @@ static ggml_mem_range ggml_mem_range_from_tensor_dst(const ggml_tensor * tensor)
8787
return ggml_mem_range_from_tensor(tensor, MEM_RANGE_TYPE_DST);
8888
}
8989

90-
static bool ggml_mem_ranges_add_src(ggml_mem_ranges * mrs, const ggml_tensor * tensor) {
90+
static bool ggml_mem_ranges_add_src(ggml_mem_ranges_t mrs, const ggml_tensor * tensor) {
9191
GGML_ASSERT(tensor);
9292

9393
ggml_mem_range mr = ggml_mem_range_from_tensor_src(tensor);
@@ -99,7 +99,7 @@ static bool ggml_mem_ranges_add_src(ggml_mem_ranges * mrs, const ggml_tensor * t
9999
return ggml_mem_ranges_add(mrs, mr);
100100
}
101101

102-
static bool ggml_mem_ranges_add_dst(ggml_mem_ranges * mrs, const ggml_tensor * tensor) {
102+
static bool ggml_mem_ranges_add_dst(ggml_mem_ranges_t mrs, const ggml_tensor * tensor) {
103103
GGML_ASSERT(tensor);
104104

105105
ggml_mem_range mr = ggml_mem_range_from_tensor_dst(tensor);
@@ -111,7 +111,7 @@ static bool ggml_mem_ranges_add_dst(ggml_mem_ranges * mrs, const ggml_tensor * t
111111
return ggml_mem_ranges_add(mrs, mr);
112112
}
113113

114-
bool ggml_mem_ranges_add(ggml_mem_ranges * mrs, const ggml_tensor * tensor) {
114+
bool ggml_mem_ranges_add(ggml_mem_ranges_t mrs, const ggml_tensor * tensor) {
115115
for (int i = 0; i < GGML_MAX_DIMS; i++) {
116116
if (tensor->src[i]) {
117117
ggml_mem_ranges_add_src(mrs, tensor->src[i]);
@@ -121,7 +121,7 @@ bool ggml_mem_ranges_add(ggml_mem_ranges * mrs, const ggml_tensor * tensor) {
121121
return ggml_mem_ranges_add_dst(mrs, tensor);
122122
}
123123

124-
static bool ggml_mem_ranges_check(const ggml_mem_ranges * mrs, ggml_mem_range mr) {
124+
static bool ggml_mem_ranges_check(ggml_mem_ranges_t mrs, ggml_mem_range mr) {
125125
for (size_t i = 0; i < mrs->ranges.size(); i++) {
126126
const auto & cmp = mrs->ranges[i];
127127

@@ -152,7 +152,7 @@ static bool ggml_mem_ranges_check(const ggml_mem_ranges * mrs, ggml_mem_range mr
152152
return true;
153153
}
154154

155-
static bool ggml_mem_ranges_check_src(const ggml_mem_ranges * mrs, const ggml_tensor * tensor) {
155+
static bool ggml_mem_ranges_check_src(ggml_mem_ranges_t mrs, const ggml_tensor * tensor) {
156156
GGML_ASSERT(tensor);
157157

158158
ggml_mem_range mr = ggml_mem_range_from_tensor_src(tensor);
@@ -162,7 +162,7 @@ static bool ggml_mem_ranges_check_src(const ggml_mem_ranges * mrs, const ggml_te
162162
return res;
163163
}
164164

165-
static bool ggml_mem_ranges_check_dst(const ggml_mem_ranges * mrs, const ggml_tensor * tensor) {
165+
static bool ggml_mem_ranges_check_dst(ggml_mem_ranges_t mrs, const ggml_tensor * tensor) {
166166
GGML_ASSERT(tensor);
167167

168168
ggml_mem_range mr = ggml_mem_range_from_tensor_dst(tensor);
@@ -172,7 +172,7 @@ static bool ggml_mem_ranges_check_dst(const ggml_mem_ranges * mrs, const ggml_te
172172
return res;
173173
}
174174

175-
bool ggml_mem_ranges_check(const ggml_mem_ranges * mrs, const ggml_tensor * tensor) {
175+
bool ggml_mem_ranges_check(ggml_mem_ranges_t mrs, const ggml_tensor * tensor) {
176176
for (int i = 0; i < GGML_MAX_DIMS; i++) {
177177
if (tensor->src[i]) {
178178
if (!ggml_mem_ranges_check_src(mrs, tensor->src[i])) {
@@ -222,7 +222,7 @@ struct node_info {
222222

223223
static std::vector<int> ggml_metal_graph_optimize_reorder(const std::vector<node_info> & nodes) {
224224
// helper to add node src and dst ranges
225-
const auto & h_add = [](ggml_mem_ranges * mrs, const node_info & node) {
225+
const auto & h_add = [](ggml_mem_ranges_t mrs, const node_info & node) {
226226
for (int i = 0; i < GGML_MAX_SRC; i++) {
227227
if (node.node->src[i]) {
228228
if (!ggml_mem_ranges_add_src(mrs, node.node->src[i])) {
@@ -246,7 +246,7 @@ static std::vector<int> ggml_metal_graph_optimize_reorder(const std::vector<node
246246
};
247247

248248
// helper to check if a node can run concurrently with the existing set of nodes
249-
const auto & h_check = [](const ggml_mem_ranges * mrs, const node_info & node) {
249+
const auto & h_check = [](ggml_mem_ranges_t mrs, const node_info & node) {
250250
for (int i = 0; i < GGML_MAX_SRC; i++) {
251251
if (node.node->src[i]) {
252252
if (!ggml_mem_ranges_check_src(mrs, node.node->src[i])) {
@@ -301,10 +301,10 @@ static std::vector<int> ggml_metal_graph_optimize_reorder(const std::vector<node
301301
std::vector<bool> used(n, false);
302302

303303
// the memory ranges for the set of currently concurrent nodes
304-
ggml_mem_ranges * mrs0 = ggml_mem_ranges_init(0);
304+
ggml_mem_ranges_t mrs0 = ggml_mem_ranges_init(0);
305305

306306
// the memory ranges for the set of nodes that haven't been processed yet, when looking forward for a node to reorder
307-
ggml_mem_ranges * mrs1 = ggml_mem_ranges_init(0);
307+
ggml_mem_ranges_t mrs1 = ggml_mem_ranges_init(0);
308308

309309
for (int i0 = 0; i0 < n; i0++) {
310310
if (used[i0]) {
@@ -375,7 +375,7 @@ static std::vector<int> ggml_metal_graph_optimize_reorder(const std::vector<node
375375
return res;
376376
}
377377

378-
void ggml_metal_graph_optimize(ggml_cgraph * gf) {
378+
void ggml_graph_optimize(ggml_cgraph * gf) {
379379
constexpr int MAX_FUSE = 16;
380380

381381
const int n = gf->n_nodes;

ggml/src/ggml-metal/ggml-metal-common.h

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -25,27 +25,27 @@ enum ggml_mem_range_type {
2525
// can be added to the set without violating the constraints (i.e. if it can be executed concurrently with the
2626
// tasks already in the set)
2727
//
28-
struct ggml_mem_ranges;
28+
typedef struct ggml_mem_ranges * ggml_mem_ranges_t;
2929

30-
struct ggml_mem_ranges * ggml_mem_ranges_init(int debug);
31-
void ggml_mem_ranges_free(struct ggml_mem_ranges * mrs);
30+
ggml_mem_ranges_t ggml_mem_ranges_init(int debug);
31+
void ggml_mem_ranges_free(ggml_mem_ranges_t mrs);
3232

3333
// remove all ranges from the set
34-
void ggml_mem_ranges_reset(struct ggml_mem_ranges * mrs);
34+
void ggml_mem_ranges_reset(ggml_mem_ranges_t mrs);
3535

3636
// add src or dst ranges to track
37-
bool ggml_mem_ranges_add(struct ggml_mem_ranges * mrs, const struct ggml_tensor * tensor);
37+
bool ggml_mem_ranges_add(ggml_mem_ranges_t mrs, const struct ggml_tensor * tensor);
3838

3939
// return false if:
4040
// - new src range overlaps with any existing dst range
4141
// - new dst range overlaps with any existing range (src or dst)
42-
bool ggml_mem_ranges_check(const struct ggml_mem_ranges * mrs, const struct ggml_tensor * tensor);
42+
bool ggml_mem_ranges_check(ggml_mem_ranges_t mrs, const struct ggml_tensor * tensor);
4343

4444
// reorder the nodes in the graph to improve concurrency, while respecting fusion
4545
//
4646
// note: this implementation is generic and not specific to metal
4747
// if it proves to work well, we can start using it for other backends in the future
48-
void ggml_metal_graph_optimize(struct ggml_cgraph * gf);
48+
void ggml_graph_optimize(struct ggml_cgraph * gf);
4949

5050
#ifdef __cplusplus
5151
}

0 commit comments

Comments
 (0)