Skip to content

Commit 17dd28c

Browse files
authored
Merge pull request #5 from kpouget/remoting
Remoting: keep working
2 parents e397396 + f29aa56 commit 17dd28c

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

51 files changed

+3206
-1177
lines changed

build.backend.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ rm -f READY_backend FAILED_backend
44
echo "int isatty(int fd) { return 1; }" | gcc -O2 -fpic -shared -ldl -o /tmp/isatty.so -xc -
55
export LD_PRELOAD=/tmp/isatty.so
66

7-
cmake --build ../build.remoting-backend --parallel 8 --target llama-cli "$@"
7+
cmake --build ../build.remoting-backend --parallel 8 --target llama-run "$@"
88

99
if [[ $? == 0 ]]; then
1010
touch READY_backend

ggml/CMakeLists.txt

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -271,8 +271,7 @@ set(GGML_PUBLIC_HEADERS
271271
include/ggml-rpc.h
272272
include/ggml-sycl.h
273273
include/ggml-vulkan.h
274-
ggml/include/ggml-remoting-frontend.h
275-
ggml/include/ggml-remoting-backend.h
274+
include/ggml-remoting-frontend.h
276275
include/gguf.h)
277276

278277
set_target_properties(ggml PROPERTIES PUBLIC_HEADER "${GGML_PUBLIC_HEADERS}")

ggml/include/ggml-remoting-backend.h

Lines changed: 0 additions & 16 deletions
This file was deleted.

ggml/src/ggml-backend-reg.cpp

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -49,10 +49,6 @@
4949
#include "ggml-remoting-frontend.h"
5050
#endif
5151

52-
#ifdef GGML_USE_REMOTINGBACKEND
53-
#include "ggml-remoting-backend.h"
54-
#endif
55-
5652
#ifdef GGML_USE_OPENCL
5753
#include "ggml-opencl.h"
5854
#endif
@@ -183,9 +179,7 @@ struct ggml_backend_registry {
183179
#ifdef GGML_USE_REMOTINGFRONTEND
184180
register_backend(ggml_backend_remoting_frontend_reg());
185181
#endif
186-
#ifdef GGML_USE_REMOTINGBACKEND
187-
register_backend(ggml_backend_remoting_backend_reg());
188-
#endif
182+
189183
#ifdef GGML_USE_OPENCL
190184
register_backend(ggml_backend_opencl_reg());
191185
#endif

ggml/src/ggml-metal/ggml-metal.m

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,9 @@
88

99
#import <Metal/Metal.h>
1010

11+
#undef GGML_LOG_DEBUG
12+
#define GGML_LOG_DEBUG(...)
13+
1114
#undef MIN
1215
#undef MAX
1316
#define MIN(a, b) ((a) < (b) ? (a) : (b))
@@ -776,8 +779,6 @@ @implementation GGMLMetalClass
776779
GGML_LOG_ERROR("%s: error: load pipeline error: %s\n", __func__, [[error description] UTF8String]); \
777780
return NULL; \
778781
} \
779-
} else { \
780-
GGML_LOG_WARN("%s: skipping %-40s (not supported)\n", __func__, "kernel_"#name); \
781782
}
782783

783784
const bool has_simdgroup_mm = ctx_dev->has_simdgroup_mm;

ggml/src/ggml-remotingbackend/CMakeLists.txt

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,16 @@ message(STATUS "Enable API Remoting backend")
55

66
ggml_add_backend_library(ggml-remotingbackend
77
backend.cpp
8-
../../include/ggml-remoting-backend.h
8+
backend-dispatched.cpp
9+
backend-dispatched-backend.cpp
10+
backend-dispatched-device.cpp
11+
backend-dispatched-buffer.cpp
12+
backend-dispatched-buffer-type.cpp
13+
backend-utils.cpp
14+
shared/api_remoting.h
15+
shared/apir_backend.h
16+
shared/venus_cs.h
17+
venus_cs_ggml-rpc-back.cpp
918
)
1019

1120
target_compile_options(ggml-remotingbackend PRIVATE -std=c++20)
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
#include "shared/apir_backend.h"
2+
3+
static inline apir_buffer_handle_t
4+
ggml_buffer_to_apir_handle(ggml_backend_buffer_t buffer) {
5+
// in the backend, the buffer handle is the buffer pointer
6+
return (apir_buffer_handle_t) buffer;
7+
}
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
#include <cstdint>
2+
#include "backend-internal.h"
3+
#include "backend-dispatched.h"
4+
5+
#include "ggml-impl.h"
6+
#include "ggml-backend-impl.h"
7+
#include "ggml-backend.h"
8+
9+
uint32_t
10+
backend_graph_compute(struct vn_cs_encoder *enc, struct vn_cs_decoder *dec, struct virgl_apir_context *ctx) {
11+
UNUSED(ctx);
12+
UNUSED(enc);
13+
14+
uint32_t shmem_res_id;
15+
vn_decode_virtgpu_shmem_res_id(dec, &shmem_res_id);
16+
17+
const void *shmem_data = ctx->iface.get_shmem_ptr(ctx->virgl_ctx, shmem_res_id);
18+
if (!shmem_data) {
19+
FATAL("Couldn't get the shmem addr from virgl :/");
20+
}
21+
size_t cgraph_size;
22+
vn_decode_size_t(dec, &cgraph_size);
23+
24+
struct vn_cs_decoder secondary_dec = vn_cs_new_decoder((const char *) shmem_data, cgraph_size);
25+
26+
ggml_cgraph *cgraph = vn_decode_ggml_cgraph(&secondary_dec, cgraph_size);
27+
28+
ggml_status status;
29+
status = bck->iface.graph_compute(bck, cgraph);
30+
31+
vn_encode_ggml_status(enc, &status);
32+
33+
return 0;
34+
}
Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
#include <cstdint>
2+
#include "backend-internal.h"
3+
#include "backend-dispatched.h"
4+
5+
#include "ggml-impl.h"
6+
#include "ggml-backend-impl.h"
7+
#include "ggml-backend.h"
8+
9+
uint32_t
10+
backend_buffer_type_get_name(struct vn_cs_encoder *enc, struct vn_cs_decoder *dec, struct virgl_apir_context *ctx) {
11+
UNUSED(ctx);
12+
ggml_backend_buffer_type_t buft;
13+
buft = vn_decode_ggml_buft(dec);
14+
15+
const char *string = buft->iface.get_name(buft);
16+
17+
const size_t string_size = strlen(string) + 1;
18+
vn_encode_array_size(enc, string_size);
19+
vn_encode_char_array(enc, string, string_size);
20+
21+
return 0;
22+
}
23+
24+
uint32_t
25+
backend_buffer_type_get_alignment(struct vn_cs_encoder *enc, struct vn_cs_decoder *dec, struct virgl_apir_context *ctx) {
26+
UNUSED(ctx);
27+
ggml_backend_buffer_type_t buft;
28+
buft = vn_decode_ggml_buft(dec);
29+
30+
size_t value = buft->iface.get_alignment(buft);
31+
vn_encode_size_t(enc, &value);
32+
33+
return 0;
34+
}
35+
36+
uint32_t
37+
backend_buffer_type_get_max_size(struct vn_cs_encoder *enc, struct vn_cs_decoder *dec, struct virgl_apir_context *ctx) {
38+
UNUSED(ctx);
39+
ggml_backend_buffer_type_t buft;
40+
buft = vn_decode_ggml_buft(dec);
41+
42+
size_t value = buft->iface.get_max_size(buft);
43+
vn_encode_size_t(enc, &value);
44+
45+
return 0;
46+
}
47+
48+
uint32_t
49+
backend_buffer_type_is_host(struct vn_cs_encoder *enc, struct vn_cs_decoder *dec, struct virgl_apir_context *ctx) {
50+
UNUSED(ctx);
51+
ggml_backend_buffer_type_t buft;
52+
buft = vn_decode_ggml_buft(dec);
53+
54+
bool is_host = buft->iface.is_host(buft);
55+
vn_encode_bool_t(enc, &is_host);
56+
57+
return 0;
58+
}
59+
60+
uint32_t
61+
backend_buffer_type_alloc_buffer(struct vn_cs_encoder *enc, struct vn_cs_decoder *dec, struct virgl_apir_context *ctx) {
62+
UNUSED(ctx);
63+
ggml_backend_buffer_type_t buft;
64+
buft = vn_decode_ggml_buft(dec);
65+
66+
size_t size;
67+
vn_decode_size_t(dec, &size);
68+
69+
ggml_backend_buffer_t buffer = buft->iface.alloc_buffer(buft, size);
70+
apir_buffer_handle_t *buffer_handle = (apir_buffer_handle_t *) buffer;
71+
vn_encode_ggml_buffer_handle(enc, buffer_handle);
72+
73+
if (buffer) {
74+
track_backend_buffer(buffer);
75+
}
76+
77+
return 0;
78+
}
Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,129 @@
1+
#include <cstdint>
2+
#include "backend-internal.h"
3+
#include "backend-dispatched.h"
4+
5+
#include "ggml-impl.h"
6+
#include "ggml-backend-impl.h"
7+
#include "ggml-backend.h"
8+
9+
uint32_t
10+
backend_buffer_get_base(struct vn_cs_encoder *enc, struct vn_cs_decoder *dec, struct virgl_apir_context *ctx) {
11+
UNUSED(ctx);
12+
ggml_backend_buffer_t buffer;
13+
buffer = vn_decode_ggml_buffer(dec);
14+
15+
uintptr_t base = (uintptr_t) buffer->iface.get_base(buffer);
16+
vn_encode_uintptr_t(enc, &base);
17+
18+
//INFO("%s: send base %p\n", __func__, (void *) base);
19+
20+
return 0;
21+
}
22+
23+
uint32_t
24+
backend_buffer_set_tensor(struct vn_cs_encoder *enc, struct vn_cs_decoder *dec, struct virgl_apir_context *ctx) {
25+
UNUSED(ctx);
26+
UNUSED(enc);
27+
28+
ggml_backend_buffer_t buffer;
29+
buffer = vn_decode_ggml_buffer(dec);
30+
31+
ggml_tensor *tensor;
32+
// safe to remove the const qualifier here
33+
tensor = (ggml_tensor *) (uintptr_t) vn_decode_ggml_tensor(dec);
34+
35+
uint32_t shmem_res_id;
36+
vn_decode_virtgpu_shmem_res_id(dec, &shmem_res_id);
37+
38+
size_t offset;
39+
vn_decode_size_t(dec, &offset);
40+
41+
size_t size;
42+
vn_decode_size_t(dec, &size);
43+
44+
void *shmem_data = ctx->iface.get_shmem_ptr(ctx->virgl_ctx, shmem_res_id);
45+
46+
if (!shmem_data) {
47+
FATAL("Couldn't get the shmem addr from virgl :/");
48+
}
49+
50+
#if 0
51+
INFO("Calling (%p)->set_tensor(tensor=%p, data=%p, offset=%lu, size=%lu",
52+
buffer, tensor, shmem_data, offset, size);
53+
#endif
54+
#if 0
55+
void **addr = (void **)(uintptr_t) shmem_data;
56+
for (int i = 0; i <= 10; i++) {
57+
INFO("%s: %p | %llx", __func__, addr, *addr);
58+
addr++;
59+
}
60+
INFO("\n");
61+
#endif
62+
63+
buffer->iface.set_tensor(buffer, tensor, shmem_data, offset, size);
64+
65+
return 0;
66+
}
67+
68+
uint32_t
69+
backend_buffer_get_tensor(struct vn_cs_encoder *enc, struct vn_cs_decoder *dec, struct virgl_apir_context *ctx) {
70+
UNUSED(ctx);
71+
UNUSED(enc);
72+
73+
ggml_backend_buffer_t buffer;
74+
buffer = vn_decode_ggml_buffer(dec);
75+
76+
77+
const ggml_tensor *tensor;
78+
// safe to remove the const qualifier here
79+
tensor = vn_decode_ggml_tensor(dec);
80+
81+
uint32_t shmem_res_id;
82+
vn_decode_virtgpu_shmem_res_id(dec, &shmem_res_id);
83+
84+
size_t offset;
85+
vn_decode_size_t(dec, &offset);
86+
87+
size_t size;
88+
vn_decode_size_t(dec, &size);
89+
90+
void *shmem_data = ctx->iface.get_shmem_ptr(ctx->virgl_ctx, shmem_res_id);
91+
if (!shmem_data) {
92+
FATAL("Couldn't get the shmem addr from virgl :/");
93+
}
94+
95+
UNUSED(buffer);
96+
UNUSED(tensor);
97+
buffer->iface.get_tensor(buffer, tensor, shmem_data, offset, size);
98+
99+
return 0;
100+
}
101+
102+
uint32_t
103+
backend_buffer_clear(struct vn_cs_encoder *enc, struct vn_cs_decoder *dec, struct virgl_apir_context *ctx) {
104+
UNUSED(ctx);
105+
UNUSED(enc);
106+
107+
ggml_backend_buffer_t buffer;
108+
buffer = vn_decode_ggml_buffer(dec);
109+
110+
uint8_t value;
111+
vn_decode_uint8_t(dec, &value);
112+
113+
buffer->iface.clear(buffer, value);
114+
115+
return 0;
116+
}
117+
118+
uint32_t
119+
backend_buffer_free_buffer(struct vn_cs_encoder *enc, struct vn_cs_decoder *dec, struct virgl_apir_context *ctx) {
120+
UNUSED(ctx);
121+
UNUSED(enc);
122+
123+
ggml_backend_buffer_t buffer;
124+
buffer = vn_decode_ggml_buffer(dec);
125+
126+
buffer->iface.free_buffer(buffer);
127+
128+
return 0;
129+
}

0 commit comments

Comments
 (0)