Skip to content

Commit a5580a3

Browse files
committed
fix cuda and macos compile issues
1 parent 6463f5c commit a5580a3

File tree

6 files changed

+6
-31
lines changed

6 files changed

+6
-31
lines changed

CMakeLists.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,8 @@ file(GLOB SRCS "ggml/src/ggml-cuda/template-instances/fattn-mma*.cu")
7979
list(APPEND GGML_SOURCES_CUDA ${SRCS})
8080
file(GLOB SRCS "ggml/src/ggml-cuda/template-instances/mmq*.cu")
8181
list(APPEND GGML_SOURCES_CUDA ${SRCS})
82+
file(GLOB SRCS "ggml/src/ggml-cuda/template-instances/mmf*.cu")
83+
list(APPEND GGML_SOURCES_CUDA ${SRCS})
8284
set(GGML_V3_CUDA_SOURCES otherarch/ggml_v3-cuda.cu otherarch/ggml_v3-cuda.h)
8385
set(GGML_V2_CUDA_SOURCES otherarch/ggml_v2-cuda.cu otherarch/ggml_v2-cuda.h)
8486
set(GGML_V2_LEGACY_CUDA_SOURCES otherarch/ggml_v2-cuda-legacy.cu otherarch/ggml_v2-cuda-legacy.h)
@@ -176,6 +178,8 @@ if (LLAMA_HIPBLAS)
176178
list(APPEND GGML_SOURCES_ROCM ${SRCS})
177179
file(GLOB SRCS "ggml/src/ggml-cuda/template-instances/mmq*.cu")
178180
list(APPEND GGML_SOURCES_ROCM ${SRCS})
181+
file(GLOB SRCS "ggml/src/ggml-cuda/template-instances/mmf*.cu")
182+
list(APPEND GGML_SOURCES_ROCM ${SRCS})
179183
add_compile_definitions(GGML_USE_HIP GGML_USE_CUDA SD_USE_CUDA GGML_HIP_NO_VMM)
180184
add_library(ggml-rocm ${GGML_SOURCES_CUDA})
181185

Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,7 @@ endif
191191
# it is recommended to use the CMAKE file to build for cublas if you can - will likely work better
192192
OBJS_CUDA_TEMP_INST = $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/template-instances/fattn-mma*.cu))
193193
OBJS_CUDA_TEMP_INST += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/template-instances/mmq*.cu))
194+
OBJS_CUDA_TEMP_INST += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/template-instances/mmf*.cu))
194195
OBJS_CUDA_TEMP_INST += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/template-instances/fattn-vec*q4_0-q4_0.cu))
195196
OBJS_CUDA_TEMP_INST += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/template-instances/fattn-vec*q8_0-q8_0.cu))
196197
OBJS_CUDA_TEMP_INST += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/template-instances/fattn-vec*f16-f16.cu))

otherarch/ttscpp/src/dia_model.cpp

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -264,11 +264,6 @@ void dia_context::reset() {
264264

265265
struct dia_context * build_new_dia_context(struct dia_model * model, int n_threads, bool use_cpu) {
266266
dia_context * dctx = new dia_context(model, n_threads);
267-
if (!use_cpu) {
268-
#ifdef GGML_USE_METAL
269-
dctx->backend = ggml_backend_metal_init();
270-
#endif
271-
}
272267
dctx->backend_cpu = ggml_backend_cpu_init();
273268
dctx->set_threads();
274269
dctx->build_schedule();
@@ -280,9 +275,7 @@ static bool dia_kv_cache_init(struct dia_kv_cache * cache, dia_model * model, di
280275
ggml_backend_buffer_type_t buft = nullptr;
281276
// this will only really support cpu or metal for the time being;
282277
if (dctx->backend != nullptr) {
283-
#ifdef GGML_USE_METAL
284-
buft = ggml_backend_metal_buffer_type();
285-
#endif
278+
286279
} else {
287280
buft = ggml_backend_cpu_buffer_type();
288281
}

otherarch/ttscpp/src/orpheus_model.cpp

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -138,11 +138,6 @@ struct ggml_tensor * build_attn_mask(ggml_context * ctx, orpheus_context * octx,
138138

139139
orpheus_context * build_new_orpheus_context(orpheus_model * model, int n_threads, bool use_cpu) {
140140
orpheus_context * octx = new orpheus_context(model, n_threads);
141-
if (!use_cpu) {
142-
#ifdef GGML_USE_METAL
143-
octx->backend = ggml_backend_metal_init();
144-
#endif
145-
}
146141
octx->backend_cpu = ggml_backend_cpu_init();
147142
octx->set_threads();
148143
octx->build_schedule();
@@ -153,9 +148,6 @@ orpheus_context * build_new_orpheus_context(orpheus_model * model, int n_threads
153148
void orpheus_runner::orpheus_kv_cache_init() {
154149
ggml_backend_buffer_type_t buft = nullptr;
155150
if (octx->backend != nullptr) {
156-
#ifdef GGML_USE_METAL
157-
buft = ggml_backend_metal_buffer_type();
158-
#endif
159151
} else {
160152
buft = ggml_backend_cpu_buffer_type();
161153
}

otherarch/ttscpp/src/parler_model.cpp

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -323,11 +323,6 @@ void parler_context::reset(int32_t n_output_heads) {
323323

324324
struct parler_context * build_new_parler_context(struct parler_tts_model * model, int n_threads, bool use_cpu) {
325325
parler_context * pctx = new parler_context(model, n_threads);
326-
if (!use_cpu) {
327-
#ifdef GGML_USE_METAL
328-
pctx->backend = ggml_backend_metal_init();
329-
#endif
330-
}
331326
pctx->eos_seen.reserve(model->n_output_heads);
332327
pctx->backend_cpu = ggml_backend_cpu_init();
333328
pctx->set_threads();
@@ -343,9 +338,6 @@ static bool parler_kv_cache_init(struct parler_kv_cache * cache, parler_tts_mode
343338
ggml_backend_buffer_type_t buft = nullptr;
344339
// this will only really support cpu or metal for the time being;
345340
if (pctx->backend != nullptr) {
346-
#ifdef GGML_USE_METAL
347-
buft = ggml_backend_metal_buffer_type();
348-
#endif
349341
} else {
350342
buft = ggml_backend_cpu_buffer_type();
351343
}

otherarch/ttscpp/src/tts_model.cpp

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -50,9 +50,6 @@ void runner_context::set_threads() {
5050
void runner_context::build_schedule(size_t max_nodes) {
5151
backend_cpu_buffer = ggml_backend_cpu_buffer_type();
5252
if (backend != nullptr) {
53-
#ifdef GGML_USE_METAL
54-
backend_buffer = ggml_backend_metal_buffer_type();
55-
#endif
5653
std::vector<ggml_backend_buffer_type_t> bufs = {backend_buffer, backend_cpu_buffer};
5754
std::vector<ggml_backend_t> backs = {backend, backend_cpu};
5855
sched = ggml_backend_sched_new(backs.data(), bufs.data(), 2, max_nodes, false, false);
@@ -103,10 +100,6 @@ void tts_model::prep_buffers_and_context(bool cpu_only, float size_offset, uint3
103100
backend = ggml_backend_cpu_init();
104101
buffer = ggml_backend_cpu_buffer_type();
105102
} else {
106-
#ifdef GGML_USE_METAL
107-
backend = ggml_backend_metal_init();
108-
buffer = ggml_backend_metal_buffer_type();
109-
#endif
110103
// if use metal is not installed then we need to warn here
111104
if (!backend || !buffer) {
112105
TTS_ABORT("'GGML_USE_METAL' is not defined either set the model to use CPU only or install ggml with metal support.");

0 commit comments

Comments
 (0)