@@ -1033,92 +1033,6 @@ ggml_backend_buffer_t ggml_backend_alloc_ctx_tensors_from_buft(struct ggml_conte
1033
1033
return buffer ;
1034
1034
}
1035
1035
1036
- ggml_backend_buffer_t ggml_backend_alloc_ctx_tensors_from_buft_for_weights (struct ggml_context * ctx , ggml_backend_buffer_type_t buft ) {
1037
- #ifndef GGML_OPENCL_SMALL_ALLOC
1038
- return ggml_backend_alloc_ctx_tensors_from_buft (ctx , buft );
1039
- #else
1040
- // Small allocation allocates a separate buffer for each tensor. Instead of
1041
- // collecting multiple tensors to allocate a large buffer, each tensor is
1042
- // allocated a buffer immediately. This is only supposed to be used for
1043
- // weights tensors (note that weights can be f32).
1044
- GGML_ASSERT (ggml_get_no_alloc (ctx ) == true);
1045
-
1046
- size_t alignment = ggml_backend_buft_get_alignment (buft );
1047
-
1048
- ggml_backend_buffer_t * buffers = NULL ;
1049
- size_t n_buffers = 0 ;
1050
-
1051
- struct ggml_tensor * first_view = NULL ;
1052
- struct ggml_tensor * first = ggml_get_first_tensor (ctx );
1053
- for (struct ggml_tensor * t = first ; t != NULL ; t = ggml_get_next_tensor (ctx , t )) {
1054
- size_t this_size = 0 ;
1055
- if (t -> data == NULL && t -> view_src == NULL ) {
1056
- // Tensor size must be properly padded.
1057
- this_size = GGML_PAD (ggml_backend_buft_get_alloc_size (buft , t ), alignment );
1058
- }
1059
-
1060
- // The allocation logic here has gone beyond intention in order to make
1061
- // `test-backend-ops` work. The very initial intention was to allocate
1062
- // memory for weights - each weight tensor gets its own buffer object.
1063
- // The original function should be used to allocate for intermediate tensors.
1064
- // There are usually no view tensors for weights; this is not true for
1065
- // intermediate tensors. However, in `test-backend-ops` there is no
1066
- // differetiation between weight tensors and intermediate tensors.
1067
- // This function is used for general allocation when small allocation is
1068
- // enabled in the test. This requires the function to also handle view
1069
- // tensors, which do no require actual allocation. In the original function,
1070
- // view tensors are allocated with other non-view tensors since view tensors
1071
- // sizes are 0.
1072
- // Here, we try to identify view tensors and allocate them with the next
1073
- // non-view tensor. View tensors cannot allocated (alone) but must be
1074
- // initialized (together with non-view tensors).
1075
-
1076
- // This is a view tensor of its size if 0. Record its location if it is the
1077
- // first one after a non-view tensor. If the next tensor is still a view,
1078
- // simply go to the next. We want to allocate all consecutive view tensors
1079
- // together with the next non-view tensor.
1080
- if (this_size == 0 && first_view == NULL ) {
1081
- first_view = t ;
1082
- continue ;
1083
- }
1084
-
1085
- if (first_view ) {
1086
- // This is a non-view tensor. If there are any view tensors before
1087
- // this non-view tensor, we want to allocate these view tensors and
1088
- // this non-view tensor together.
1089
- // The first tensor to allocate is the first view tensor.
1090
- first = first_view ;
1091
- } else {
1092
- // Otherwise, allocate this non-view tensor immediately.
1093
- first = t ;
1094
- }
1095
-
1096
- if (!alloc_tensor_range (ctx , first , ggml_get_next_tensor (ctx , t ), buft , this_size , & buffers , & n_buffers )) {
1097
- return NULL ;
1098
- }
1099
-
1100
- // Always reset first_view after a non-view tensor.
1101
- first_view = NULL ;
1102
- }
1103
-
1104
- if (n_buffers == 0 ) {
1105
- #ifndef NDEBUG
1106
- fprintf (stderr , "%s: all tensors in the context are already allocated\n" , __func__ );
1107
- #endif
1108
- return NULL ;
1109
- }
1110
-
1111
- ggml_backend_buffer_t buffer ;
1112
- if (n_buffers == 1 ) {
1113
- buffer = buffers [0 ];
1114
- } else {
1115
- buffer = ggml_backend_multi_buffer_alloc_buffer (buffers , n_buffers );
1116
- }
1117
- free (buffers );
1118
- return buffer ;
1119
- #endif
1120
- }
1121
-
1122
1036
ggml_backend_buffer_t ggml_backend_alloc_ctx_tensors (struct ggml_context * ctx , ggml_backend_t backend ) {
1123
1037
return ggml_backend_alloc_ctx_tensors_from_buft (ctx , ggml_backend_get_default_buffer_type (backend ));
1124
1038
}
0 commit comments