@@ -165,3 +165,72 @@ vn_decode_ggml_cgraph(struct vn_cs_decoder *dec, size_t cgraph_size) {
165165
166166 return deserialize_graph (n_nodes, n_tensors, tensors, nodes);
167167}
168+
169+ static inline void
170+ vn_encode_ggml_buffer_handle (struct vn_cs_encoder *enc, const apir_buffer_host_handle_t *handle) {
171+ vn_cs_encoder_write (enc, sizeof (*handle), &handle, sizeof (*handle));
172+ }
173+
174+ static inline void
175+ vn_encode_ggml_tensor_inline (struct vn_cs_encoder *enc, const ggml_tensor *tensor) {
176+ size_t tensor_size = sizeof (*tensor);
177+
178+ if (tensor->extra ) {
179+ FATAL (" Cannot pass tensors with extra" );
180+ }
181+
182+ if (tensor->src [0 ] && tensor->buffer ) {
183+ static int first = 1 ;
184+ if (first) {
185+ // not sure if the buffer needs to be updated inside the src tensors or not
186+ WARNING (" Cannot pass tensors with src and buffer" );
187+ first = 0 ;
188+ }
189+ }
190+
191+ vn_cs_encoder_write (enc, tensor_size, tensor, tensor_size);
192+
193+ // tensor->data is a pointer inside the device buffer. No need to touch it
194+ // tensor->buffer is a pointer to a buffer. Encoding the buffer handle in sequence.
195+ // (could also make a copy of the tensor, and update locally.)
196+
197+ if (tensor->buffer ) {
198+ apir_buffer_host_handle_t buffer_handle = ggml_buffer_to_apir_handle (tensor->buffer );
199+ vn_encode_ggml_buffer_handle (enc, &buffer_handle);
200+ }
201+
202+ if (tensor->view_src ) {
203+ vn_cs_encoder_write (enc, tensor_size, tensor->view_src , tensor_size);
204+ }
205+
206+ for (int i = 0 ; tensor->src [i]; i++) {
207+ const ggml_tensor *tensor_src = tensor->src [i];
208+ vn_cs_encoder_write (enc, tensor_size, tensor_src, tensor_size);
209+ }
210+ }
211+
212+ static inline const ggml_tensor *
213+ vn_decode_ggml_tensor_inplace (struct vn_cs_decoder *dec) {
214+
215+ // it safe to remove the `const` qualifier here, we *do* want to
216+ // modify the shared memory data to fix the `src` pointers.
217+ ggml_tensor *tensor = (ggml_tensor *)(uintptr_t ) vn_cs_decoder_use_inplace (dec, sizeof (ggml_tensor));
218+
219+ // tensor->data is a pointer inside the device buffer. No need to touch it
220+ // tensor->buffer is a pointer to a buffer. Decode the buffer handle encoded in sequence.
221+ if (tensor->buffer ) {
222+ tensor->buffer = vn_decode_ggml_buffer (dec);
223+ }
224+
225+ if (tensor->view_src ) {
226+ ggml_tensor *tensor_view_src = (ggml_tensor *)(uintptr_t ) vn_cs_decoder_use_inplace (dec, sizeof (ggml_tensor));
227+ tensor->view_src = tensor_view_src;
228+ }
229+
230+ for (int i = 0 ; tensor->src [i]; i++) {
231+ ggml_tensor *tensor_src = (ggml_tensor *)(uintptr_t ) vn_cs_decoder_use_inplace (dec, sizeof (ggml_tensor));
232+ tensor->src [i] = tensor_src; // overwrite op->src[i] pointer with the actual location of the src tensor
233+ }
234+
235+ return tensor;
236+ }
0 commit comments