@@ -1033,92 +1033,6 @@ ggml_backend_buffer_t ggml_backend_alloc_ctx_tensors_from_buft(struct ggml_conte
10331033 return buffer ;
10341034}
10351035
1036- ggml_backend_buffer_t ggml_backend_alloc_ctx_tensors_from_buft_for_weights (struct ggml_context * ctx , ggml_backend_buffer_type_t buft ) {
1037- #ifndef GGML_OPENCL_SMALL_ALLOC
1038- return ggml_backend_alloc_ctx_tensors_from_buft (ctx , buft );
1039- #else
1040- // Small allocation allocates a separate buffer for each tensor. Instead of
1041- // collecting multiple tensors to allocate a large buffer, each tensor is
1042- // allocated a buffer immediately. This is only supposed to be used for
1043- // weights tensors (note that weights can be f32).
1044- GGML_ASSERT (ggml_get_no_alloc (ctx ) == true);
1045-
1046- size_t alignment = ggml_backend_buft_get_alignment (buft );
1047-
1048- ggml_backend_buffer_t * buffers = NULL ;
1049- size_t n_buffers = 0 ;
1050-
1051- struct ggml_tensor * first_view = NULL ;
1052- struct ggml_tensor * first = ggml_get_first_tensor (ctx );
1053- for (struct ggml_tensor * t = first ; t != NULL ; t = ggml_get_next_tensor (ctx , t )) {
1054- size_t this_size = 0 ;
1055- if (t -> data == NULL && t -> view_src == NULL ) {
1056- // Tensor size must be properly padded.
1057- this_size = GGML_PAD (ggml_backend_buft_get_alloc_size (buft , t ), alignment );
1058- }
1059-
1060- // The allocation logic here has gone beyond intention in order to make
1061- // `test-backend-ops` work. The very initial intention was to allocate
1062- // memory for weights - each weight tensor gets its own buffer object.
1063- // The original function should be used to allocate for intermediate tensors.
1064- // There are usually no view tensors for weights; this is not true for
1065- // intermediate tensors. However, in `test-backend-ops` there is no
1066- // differetiation between weight tensors and intermediate tensors.
1067- // This function is used for general allocation when small allocation is
1068- // enabled in the test. This requires the function to also handle view
1069- // tensors, which do no require actual allocation. In the original function,
1070- // view tensors are allocated with other non-view tensors since view tensors
1071- // sizes are 0.
1072- // Here, we try to identify view tensors and allocate them with the next
1073- // non-view tensor. View tensors cannot allocated (alone) but must be
1074- // initialized (together with non-view tensors).
1075-
1076- // This is a view tensor of its size if 0. Record its location if it is the
1077- // first one after a non-view tensor. If the next tensor is still a view,
1078- // simply go to the next. We want to allocate all consecutive view tensors
1079- // together with the next non-view tensor.
1080- if (this_size == 0 && first_view == NULL ) {
1081- first_view = t ;
1082- continue ;
1083- }
1084-
1085- if (first_view ) {
1086- // This is a non-view tensor. If there are any view tensors before
1087- // this non-view tensor, we want to allocate these view tensors and
1088- // this non-view tensor together.
1089- // The first tensor to allocate is the first view tensor.
1090- first = first_view ;
1091- } else {
1092- // Otherwise, allocate this non-view tensor immediately.
1093- first = t ;
1094- }
1095-
1096- if (!alloc_tensor_range (ctx , first , ggml_get_next_tensor (ctx , t ), buft , this_size , & buffers , & n_buffers )) {
1097- return NULL ;
1098- }
1099-
1100- // Always reset first_view after a non-view tensor.
1101- first_view = NULL ;
1102- }
1103-
1104- if (n_buffers == 0 ) {
1105- #ifndef NDEBUG
1106- fprintf (stderr , "%s: all tensors in the context are already allocated\n" , __func__ );
1107- #endif
1108- return NULL ;
1109- }
1110-
1111- ggml_backend_buffer_t buffer ;
1112- if (n_buffers == 1 ) {
1113- buffer = buffers [0 ];
1114- } else {
1115- buffer = ggml_backend_multi_buffer_alloc_buffer (buffers , n_buffers );
1116- }
1117- free (buffers );
1118- return buffer ;
1119- #endif
1120- }
1121-
11221036ggml_backend_buffer_t ggml_backend_alloc_ctx_tensors (struct ggml_context * ctx , ggml_backend_t backend ) {
11231037 return ggml_backend_alloc_ctx_tensors_from_buft (ctx , ggml_backend_get_default_buffer_type (backend ));
11241038}
0 commit comments