Skip to content

Commit 4921388

Browse files
authored
Merge pull request #13 from kpouget/supports_op
Reintroduce the support for supports_op
2 parents af7caf0 + 34e68b5 commit 4921388

File tree

8 files changed

+104
-12
lines changed

8 files changed

+104
-12
lines changed

ggml/src/ggml-remotingbackend/backend-dispatched-backend.cpp

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,21 @@ backend_graph_compute(struct vn_cs_encoder *enc, struct vn_cs_decoder *dec, stru
3232
ggml_cgraph *cgraph = vn_decode_ggml_cgraph(&secondary_dec, cgraph_size);
3333

3434
ggml_status status;
35+
#if APIR_BACKEND_CHECK_SUPPORTS_OP == 1
36+
for (int idx = 0; idx < cgraph->n_nodes; idx++) {
37+
ggml_tensor *op = ggml_graph_node(cgraph, idx);
38+
if (dev->iface.supports_op(dev, op)) {
39+
continue;
40+
}
41+
ERROR("Graph node %d (%s) not supported by the backend :/", idx, ggml_op_desc(op));
42+
43+
status = GGML_STATUS_ABORTED;
44+
vn_encode_ggml_status(enc, &status);
45+
46+
stop_timer(&graph_compute_timer);
47+
return 0;
48+
}
49+
#endif
3550
status = bck->iface.graph_compute(bck, cgraph);
3651

3752
vn_encode_ggml_status(enc, &status);

ggml/src/ggml-remotingbackend/backend-dispatched-device.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ uint32_t
7373
backend_device_supports_op(struct vn_cs_encoder *enc, struct vn_cs_decoder *dec, struct virgl_apir_context *ctx) {
7474
UNUSED(ctx);
7575

76-
const ggml_tensor *op = vn_decode_ggml_tensor(dec);
76+
const ggml_tensor *op = vn_decode_ggml_tensor_inplace(dec);
7777

7878
bool supports_op = dev->iface.supports_op(dev, op);
7979

ggml/src/ggml-remotingbackend/shared/apir_backend.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,12 @@
99

1010
#define APIR_BACKEND_FORWARD_INDEX_INVALID 6
1111

12+
// 1 is fast, 0 avoid micro-benchmark crashes
13+
#define APIR_DEVICE_SUPPORTS_OP_ALWAYS_TRUE 0
14+
15+
// 0 is fast, 1 avoids the backend to crash if an unsupported tensor is received
16+
#define APIR_BACKEND_CHECK_SUPPORTS_OP 0
17+
1218
typedef uintptr_t apir_buffer_type_host_handle_t;
1319
typedef uintptr_t apir_buffer_host_handle_t;
1420

ggml/src/ggml-remotingbackend/shared/venus_cs_ggml.h

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -165,3 +165,72 @@ vn_decode_ggml_cgraph(struct vn_cs_decoder *dec, size_t cgraph_size) {
165165

166166
return deserialize_graph(n_nodes, n_tensors, tensors, nodes);
167167
}
168+
169+
static inline void
170+
vn_encode_ggml_buffer_handle(struct vn_cs_encoder *enc, const apir_buffer_host_handle_t *handle) {
171+
vn_cs_encoder_write(enc, sizeof(*handle), &handle, sizeof(*handle));
172+
}
173+
174+
static inline void
175+
vn_encode_ggml_tensor_inline(struct vn_cs_encoder *enc, const ggml_tensor *tensor) {
176+
size_t tensor_size = sizeof(*tensor);
177+
178+
if (tensor->extra) {
179+
FATAL("Cannot pass tensors with extra");
180+
}
181+
182+
if (tensor->src[0] && tensor->buffer) {
183+
static int first = 1;
184+
if (first) {
185+
// not sure if the buffer needs to be updated inside the src tensors or not
186+
WARNING("Cannot pass tensors with src and buffer");
187+
first = 0;
188+
}
189+
}
190+
191+
vn_cs_encoder_write(enc, tensor_size, tensor, tensor_size);
192+
193+
// tensor->data is a pointer inside the device buffer. No need to touch it
194+
// tensor->buffer is a pointer to a buffer. Encoding the buffer handle in sequence.
195+
// (could also make a copy of the tensor, and update locally.)
196+
197+
if (tensor->buffer) {
198+
apir_buffer_host_handle_t buffer_handle = ggml_buffer_to_apir_handle(tensor->buffer);
199+
vn_encode_ggml_buffer_handle(enc, &buffer_handle);
200+
}
201+
202+
if (tensor->view_src) {
203+
vn_cs_encoder_write(enc, tensor_size, tensor->view_src, tensor_size);
204+
}
205+
206+
for (int i = 0; tensor->src[i]; i++) {
207+
const ggml_tensor *tensor_src = tensor->src[i];
208+
vn_cs_encoder_write(enc, tensor_size, tensor_src, tensor_size);
209+
}
210+
}
211+
212+
static inline const ggml_tensor *
213+
vn_decode_ggml_tensor_inplace(struct vn_cs_decoder *dec) {
214+
215+
// it safe to remove the `const` qualifier here, we *do* want to
216+
// modify the shared memory data to fix the `src` pointers.
217+
ggml_tensor *tensor = (ggml_tensor *)(uintptr_t) vn_cs_decoder_use_inplace(dec, sizeof(ggml_tensor));
218+
219+
// tensor->data is a pointer inside the device buffer. No need to touch it
220+
// tensor->buffer is a pointer to a buffer. Decode the buffer handle encoded in sequence.
221+
if (tensor->buffer) {
222+
tensor->buffer = vn_decode_ggml_buffer(dec);
223+
}
224+
225+
if (tensor->view_src) {
226+
ggml_tensor *tensor_view_src = (ggml_tensor *)(uintptr_t) vn_cs_decoder_use_inplace(dec, sizeof(ggml_tensor));
227+
tensor->view_src = tensor_view_src;
228+
}
229+
230+
for (int i = 0; tensor->src[i]; i++) {
231+
ggml_tensor *tensor_src = (ggml_tensor *)(uintptr_t) vn_cs_decoder_use_inplace(dec, sizeof(ggml_tensor));
232+
tensor->src[i] = tensor_src; // overwrite op->src[i] pointer with the actual location of the src tensor
233+
}
234+
235+
return tensor;
236+
}

ggml/src/ggml-remotingfrontend/ggml-backend-device.cpp

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -38,16 +38,9 @@ ggml_backend_remoting_device_get_memory(ggml_backend_dev_t dev, size_t * free, s
3838

3939
static bool
4040
ggml_backend_remoting_device_supports_op(ggml_backend_dev_t dev, const ggml_tensor * op) {
41-
#if 1
42-
UNUSED(dev);
43-
UNUSED(op);
44-
45-
return true; // same as ggml-rpc
46-
#else
4741
struct virtgpu *gpu = DEV_TO_GPU(dev);
4842

4943
return apir_device_supports_op(gpu, op);
50-
#endif
5144
}
5245

5346
static bool

ggml/src/ggml-remotingfrontend/ggml-remoting.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,3 +126,7 @@ struct remoting_context_struct {
126126
};
127127
typedef std::shared_ptr<remoting_context_struct> remoting_context;
128128
typedef std::weak_ptr<remoting_context_struct> remoting_context_ref;
129+
130+
static inline apir_buffer_host_handle_t ggml_buffer_to_apir_handle(ggml_backend_buffer_t buffer) {
131+
return BUFFER_TO_HOST_HANDLE(buffer);
132+
}

ggml/src/ggml-remotingfrontend/venus_cs_ggml-rpc-front.cpp

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,8 +40,13 @@ serialize_tensor(const ggml_tensor * tensor) {
4040
result.view_src = reinterpret_cast<uint64_t>(tensor->view_src);
4141
result.view_offs = tensor->view_offs;
4242
result.data = reinterpret_cast<uint64_t>(tensor->data);
43-
// tensor->data is serialized as an offset to the buffer base address
44-
result.data -= reinterpret_cast<uint64_t>(BUFFER_TO_GGML_CONTEXT(tensor->buffer)->base);
43+
if (tensor->data) {
44+
if (!tensor->buffer) {
45+
FATAL("tensor has data but not buffer :/");
46+
}
47+
// tensor->data is serialized as an offset to the buffer base address
48+
result.data -= reinterpret_cast<uint64_t>(BUFFER_TO_GGML_CONTEXT(tensor->buffer)->base);
49+
}
4550
snprintf(result.name, GGML_MAX_NAME, "%s", tensor->name);
4651
return result;
4752
}

ggml/src/ggml-remotingfrontend/virtgpu-forward-device.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,7 @@ apir_device_get_memory(struct virtgpu *gpu, size_t *free, size_t *total) {
135135

136136
bool
137137
apir_device_supports_op(struct virtgpu *gpu, const ggml_tensor *op) {
138-
#if 1
138+
#if APIR_DEVICE_SUPPORTS_OP_ALWAYS_TRUE
139139
/* ggml-rpc cheats it like this */
140140
/* with the current implementation of serialize_tensor, the src/view aren't properly passed */
141141
UNUSED(gpu);
@@ -147,7 +147,7 @@ apir_device_supports_op(struct virtgpu *gpu, const ggml_tensor *op) {
147147
struct vn_cs_decoder *decoder;
148148
REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_DEVICE_SUPPORTS_OP);
149149

150-
vn_encode_ggml_tensor(encoder, op);
150+
vn_encode_ggml_tensor_inline(encoder, op);
151151

152152
REMOTE_CALL(gpu, encoder, decoder);
153153

0 commit comments

Comments
 (0)