Skip to content

Commit a4ca0c3

Browse files
Merge branch 'master' of https://github.com/ggml-org/llama.cpp
2 parents 8dc66b9 + 06c2b15 commit a4ca0c3

40 files changed

+1268
-256
lines changed

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,8 @@ lcov-report/
4545
tags
4646
.build/
4747
build*
48+
release
49+
debug
4850
!build-info.cmake
4951
!build-info.cpp.in
5052
!build-info.sh

CONTRIBUTING.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@
3939
4040
_(NOTE: this guideline is yet to be applied to the `llama.cpp` codebase. New code should follow this guideline.)_
4141
42-
- Try to follow the existing patterns in the code (indentation, spaces, etc.). In case of doubt use `clang-format` to format the added code
42+
- Try to follow the existing patterns in the code (indentation, spaces, etc.). In case of doubt use `clang-format` (from clang-tools v15+) to format the added code
4343
- For anything not covered in the current guidelines, refer to the [C++ Core Guidelines](https://isocpp.github.io/CppCoreGuidelines/CppCoreGuidelines)
4444
- Tensors store data in row-major order. We refer to dimension 0 as columns, 1 as rows, 2 as matrices
4545
- Matrix multiplication is unconventional: [`C = ggml_mul_mat(ctx, A, B)`](https://github.com/ggml-org/llama.cpp/blob/880e352277fc017df4d5794f0c21c44e1eae2b84/ggml.h#L1058-L1064) means $C^T = A B^T \Leftrightarrow C = B A^T.$

convert_hf_to_gguf.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -699,6 +699,9 @@ def get_vocab_base_pre(self, tokenizer) -> str:
699699
if chkhsh == "b3f499bb4255f8ca19fccd664443283318f2fd2414d5e0b040fbdd0cc195d6c5":
700700
# ref: https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B
701701
res = "deepseek-r1-qwen"
702+
if chkhsh == "ccc2ef013c104be7bae2965776d611e1d7a8a2a9c547dd93a682c9a9fc80352e":
703+
# ref: https://huggingface.co/Xenova/gpt-4o
704+
res = "gpt-4o"
702705

703706
if res is None:
704707
logger.warning("\n")
@@ -2535,7 +2538,8 @@ def set_gguf_parameters(self):
25352538
rms_eps = self.find_hparam(["rms_norm_eps"])
25362539
max_pos_embds = self.find_hparam(["n_positions", "max_position_embeddings"])
25372540
orig_max_pos_embds = self.find_hparam(["original_max_position_embeddings"])
2538-
rope_dims = n_embd // n_head
2541+
rot_pct = self.hparams.get("partial_rotary_factor", 1.0)
2542+
rope_dims = int(rot_pct * n_embd) // n_head
25392543

25402544
self.gguf_writer.add_context_length(max_pos_embds)
25412545
self.gguf_writer.add_rope_scaling_orig_ctx_len(orig_max_pos_embds)
@@ -2559,7 +2563,8 @@ def generate_extra_tensors(self) -> Iterable[tuple[str, Tensor]]:
25592563
n_head = self.find_hparam(["num_attention_heads", "n_head"])
25602564
max_pos_embds = self.find_hparam(["n_positions", "max_position_embeddings"])
25612565
orig_max_pos_embds = self.find_hparam(["original_max_position_embeddings"])
2562-
rope_dims = n_embd // n_head
2566+
rot_pct = self.hparams.get("partial_rotary_factor", 1.0)
2567+
rope_dims = int(rot_pct * n_embd) // n_head
25632568

25642569
# write rope scaling for long context (128k) model
25652570
rope_scaling = self.find_hparam(['rope_scaling'], True)
@@ -2588,7 +2593,7 @@ def generate_extra_tensors(self) -> Iterable[tuple[str, Tensor]]:
25882593
raise KeyError('Missing the required key rope_scaling.long_factor or rope_scaling_short_factor')
25892594

25902595
if len(long_factors) != len(short_factors) or len(long_factors) != rope_dims / 2:
2591-
raise ValueError(f'The length of rope long and short factors must be {rope_dims / 2}')
2596+
raise ValueError(f'The length of rope long and short factors must be {rope_dims / 2}. long_factors = {len(long_factors)}, short_factors = {len(short_factors)}.')
25922597

25932598
yield (self.format_tensor_name(gguf.MODEL_TENSOR.ROPE_FACTORS_LONG), torch.tensor(long_factors, dtype=torch.float32))
25942599
yield (self.format_tensor_name(gguf.MODEL_TENSOR.ROPE_FACTORS_SHORT), torch.tensor(short_factors, dtype=torch.float32))

convert_hf_to_gguf_update.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,7 @@ class TOKENIZER_TYPE(IntEnum):
109109
{"name": "megrez", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/Infinigence/Megrez-3B-Instruct"},
110110
{"name": "deepseek-v3", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/deepseek-ai/DeepSeek-V3"},
111111
{"name": "deepseek-r1-qwen", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"},
112+
{"name": "gpt-4o", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/Xenova/gpt-4o", },
112113
]
113114

114115

@@ -131,6 +132,10 @@ def download_model(model):
131132

132133
files = ["config.json", "tokenizer.json", "tokenizer_config.json"]
133134

135+
if name == "gpt-4o":
136+
# Xenova/gpt-4o is tokenizer-only, it does not contain config.json
137+
files = ["tokenizer.json", "tokenizer_config.json"]
138+
134139
if tokt == TOKENIZER_TYPE.SPM:
135140
files.append("tokenizer.model")
136141

examples/llava/README-granitevision.md

Lines changed: 34 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@
33
Download the model and point your `GRANITE_MODEL` environment variable to the path.
44

55
```bash
6-
$ git clone https://huggingface.co/ibm-granite/granite-vision-3.1-2b-preview
7-
$ export GRANITE_MODEL=./granite-vision-3.1-2b-preview
6+
$ git clone https://huggingface.co/ibm-granite/granite-vision-3.2-2b
7+
$ export GRANITE_MODEL=./granite-vision-3.2-2b
88
```
99

1010

@@ -41,17 +41,26 @@ If you actually inspect the `.keys()` of the loaded tensors, you should see a lo
4141

4242

4343
### 2. Creating the Visual Component GGUF
44-
To create the GGUF for the visual components, we need to write a config for the visual encoder; make sure the config contains the correct `image_grid_pinpoints`
44+
Next, create a new directory to hold the visual components, and copy the llava.clip/projector files, as shown below.
4545

46+
```bash
47+
$ ENCODER_PATH=$PWD/visual_encoder
48+
$ mkdir $ENCODER_PATH
49+
50+
$ cp $GRANITE_MODEL/llava.clip $ENCODER_PATH/pytorch_model.bin
51+
$ cp $GRANITE_MODEL/llava.projector $ENCODER_PATH/
52+
```
53+
54+
Now, we need to write a config for the visual encoder. In order to convert the model, be sure to use the correct `image_grid_pinpoints`, as these may vary based on the model. You can find the `image_grid_pinpoints` in `$GRANITE_MODEL/config.json`.
4655

47-
Note: we refer to this file as `$VISION_CONFIG` later on.
4856
```json
4957
{
5058
"_name_or_path": "siglip-model",
5159
"architectures": [
5260
"SiglipVisionModel"
5361
],
5462
"image_grid_pinpoints": [
63+
[384,384],
5564
[384,768],
5665
[384,1152],
5766
[384,1536],
@@ -94,42 +103,32 @@ Note: we refer to this file as `$VISION_CONFIG` later on.
94103
}
95104
```
96105

97-
Create a new directory to hold the visual components, and copy the llava.clip/projector files, as well as the vision config into it.
98-
99-
```bash
100-
$ ENCODER_PATH=$PWD/visual_encoder
101-
$ mkdir $ENCODER_PATH
102-
103-
$ cp $GRANITE_MODEL/llava.clip $ENCODER_PATH/pytorch_model.bin
104-
$ cp $GRANITE_MODEL/llava.projector $ENCODER_PATH/
105-
$ cp $VISION_CONFIG $ENCODER_PATH/config.json
106-
```
107-
108-
At which point you should have something like this:
106+
At this point you should have something like this:
109107
```bash
110108
$ ls $ENCODER_PATH
111109
config.json llava.projector pytorch_model.bin
112110
```
113111

114-
Now convert the components to GGUF; Note that we also override the image mean/std dev to `[.5,.5,.5]` since we use the siglip visual encoder - in the transformers model, you can find these numbers in the [preprocessor_config.json](https://huggingface.co/ibm-granite/granite-vision-3.1-2b-preview/blob/main/preprocessor_config.json).
112+
Now convert the components to GGUF; Note that we also override the image mean/std dev to `[.5,.5,.5]` since we use the SigLIP visual encoder - in the transformers model, you can find these numbers in the `preprocessor_config.json`.
115113
```bash
116114
$ python convert_image_encoder_to_gguf.py \
117115
-m $ENCODER_PATH \
118116
--llava-projector $ENCODER_PATH/llava.projector \
119117
--output-dir $ENCODER_PATH \
120118
--clip-model-is-vision \
121119
--clip-model-is-siglip \
122-
--image-mean 0.5 0.5 0.5 --image-std 0.5 0.5 0.5
120+
--image-mean 0.5 0.5 0.5 \
121+
--image-std 0.5 0.5 0.5
123122
```
124123

125-
this will create the first GGUF file at `$ENCODER_PATH/mmproj-model-f16.gguf`; we will refer to the abs path of this file as the `$VISUAL_GGUF_PATH.`
124+
This will create the first GGUF file at `$ENCODER_PATH/mmproj-model-f16.gguf`; we will refer to the absolute path of this file as the `$VISUAL_GGUF_PATH.`
126125

127126

128127
### 3. Creating the LLM GGUF.
129128
The granite vision model contains a granite LLM as its language model. For now, the easiest way to get the GGUF for LLM is by loading the composite model in `transformers` and exporting the LLM so that it can be directly converted with the normal conversion path.
130129

131130
First, set the `LLM_EXPORT_PATH` to the path to export the `transformers` LLM to.
132-
```
131+
```bash
133132
$ export LLM_EXPORT_PATH=$PWD/granite_vision_llm
134133
```
135134

@@ -142,7 +141,7 @@ if not MODEL_PATH:
142141
raise ValueError("env var GRANITE_MODEL is unset!")
143142

144143
LLM_EXPORT_PATH = os.getenv("LLM_EXPORT_PATH")
145-
if not MODEL_PATH:
144+
if not LLM_EXPORT_PATH:
146145
raise ValueError("env var LLM_EXPORT_PATH is unset!")
147146

148147
tokenizer = transformers.AutoTokenizer.from_pretrained(MODEL_PATH)
@@ -166,18 +165,26 @@ $ python convert_hf_to_gguf.py --outfile $LLM_GGUF_PATH $LLM_EXPORT_PATH
166165
```
167166

168167

169-
### 4. Running the Model in Llama cpp
170-
Build llama cpp normally; you should have a target binary named `llama-llava-cli`, which you can pass two binaries to. Sample usage:
168+
### 4. Quantization
169+
If you want to quantize the LLM, you can do so with `llama-quantize` as you would any other LLM. For example:
170+
```bash
171+
$ ./build/bin/llama-quantize $LLM_EXPORT_PATH/granite_llm.gguf $LLM_EXPORT_PATH/granite_llm_q4_k_m.gguf Q4_K_M
172+
$ LLM_GGUF_PATH=$LLM_EXPORT_PATH/granite_llm_q4_k_m.gguf
173+
```
174+
175+
Note that currently you cannot quantize the visual encoder because granite vision models use SigLIP as the visual encoder, which has tensor dimensions that are not divisible by 32.
176+
171177

172-
Note - the test image shown below can be found [here](https://github-production-user-asset-6210df.s3.amazonaws.com/10740300/415512792-d90d5562-8844-4f34-a0a5-77f62d5a58b5.jpg?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAVCODYLSA53PQK4ZA%2F20250221%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20250221T054145Z&X-Amz-Expires=300&X-Amz-Signature=86c60be490aa49ef7d53f25d6c973580a8273904fed11ed2453d0a38240ee40a&X-Amz-SignedHeaders=host).
178+
### 5. Running the Model in Llama cpp
179+
Build llama cpp normally; you should have a target binary named `llama-llava-cli`, which you can pass two binaries to. As an example, we pass the the llama.cpp banner.
173180

174181
```bash
175182
$ ./build/bin/llama-llava-cli -m $LLM_GGUF_PATH \
176183
--mmproj $VISUAL_GGUF_PATH \
177-
--image cherry_blossom.jpg \
184+
--image ./media/llama0-banner.png \
178185
-c 16384 \
179-
-p "<|system|>\nA chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions.\n<|user|>\n\<image>\nWhat type of flowers are in this picture?\n<|assistant|>\n" \
186+
-p "<|system|>\nA chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions.\n<|user|>\n\<image>\nWhat does the text in this image say?\n<|assistant|>\n" \
180187
--temp 0
181188
```
182189

183-
Sample response: `The flowers in the picture are cherry blossoms, which are known for their delicate pink petals and are often associated with the beauty of spring.`
190+
Sample output: `The text in the image reads "LLAMA C++ Can it run DOOM Llama?"`

ggml/include/ggml-alloc.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ struct ggml_tallocr {
1919
};
2020

2121
GGML_API struct ggml_tallocr ggml_tallocr_new(ggml_backend_buffer_t buffer);
22-
GGML_API void ggml_tallocr_alloc(struct ggml_tallocr * talloc, struct ggml_tensor * tensor);
22+
GGML_API enum ggml_status ggml_tallocr_alloc(struct ggml_tallocr * talloc, struct ggml_tensor * tensor);
2323

2424
// Graph allocator
2525
/*

ggml/include/ggml-backend.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ extern "C" {
5656
GGML_API void ggml_backend_buffer_free (ggml_backend_buffer_t buffer);
5757
GGML_API void * ggml_backend_buffer_get_base (ggml_backend_buffer_t buffer);
5858
GGML_API size_t ggml_backend_buffer_get_size (ggml_backend_buffer_t buffer);
59-
GGML_API void ggml_backend_buffer_init_tensor (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
59+
GGML_API enum ggml_status ggml_backend_buffer_init_tensor (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
6060
GGML_API size_t ggml_backend_buffer_get_alignment (ggml_backend_buffer_t buffer);
6161
GGML_API size_t ggml_backend_buffer_get_max_size (ggml_backend_buffer_t buffer);
6262
GGML_API size_t ggml_backend_buffer_get_alloc_size(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
@@ -342,8 +342,8 @@ extern "C" {
342342
GGML_API bool ggml_backend_compare_graph_backend(ggml_backend_t backend1, ggml_backend_t backend2, struct ggml_cgraph * graph, ggml_backend_eval_callback callback, void * user_data);
343343

344344
// Tensor initialization
345-
GGML_API void ggml_backend_tensor_alloc(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, void * addr);
346-
GGML_API void ggml_backend_view_init(struct ggml_tensor * tensor);
345+
GGML_API enum ggml_status ggml_backend_tensor_alloc(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, void * addr);
346+
GGML_API enum ggml_status ggml_backend_view_init(struct ggml_tensor * tensor);
347347

348348
// CPU buffer types are always available
349349
GGML_API ggml_backend_buffer_t ggml_backend_cpu_buffer_from_ptr(void * ptr, size_t size);

ggml/src/ggml-alloc.c

Lines changed: 24 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,7 @@ struct ggml_tallocr ggml_tallocr_new(ggml_backend_buffer_t buffer) {
8989
return talloc;
9090
}
9191

92-
void ggml_tallocr_alloc(struct ggml_tallocr * talloc, struct ggml_tensor * tensor) {
92+
enum ggml_status ggml_tallocr_alloc(struct ggml_tallocr * talloc, struct ggml_tensor * tensor) {
9393
size_t size = ggml_backend_buffer_get_alloc_size(talloc->buffer, tensor);
9494
size = GGML_PAD(size, talloc->alignment);
9595

@@ -104,7 +104,7 @@ void ggml_tallocr_alloc(struct ggml_tallocr * talloc, struct ggml_tensor * tenso
104104

105105
assert(((uintptr_t)addr % talloc->alignment) == 0);
106106

107-
ggml_backend_tensor_alloc(talloc->buffer, tensor, addr);
107+
return ggml_backend_tensor_alloc(talloc->buffer, tensor, addr);
108108
}
109109

110110
// dynamic tensor allocator
@@ -933,42 +933,51 @@ size_t ggml_gallocr_get_buffer_size(ggml_gallocr_t galloc, int buffer_id) {
933933

934934
// utils
935935

936+
static void free_buffers(ggml_backend_buffer_t ** buffers, const size_t * n_buffers) {
937+
for (size_t i = 0; i < *n_buffers; i++) {
938+
ggml_backend_buffer_free((*buffers)[i]);
939+
}
940+
free(*buffers);
941+
}
942+
936943
static bool alloc_tensor_range(struct ggml_context * ctx,
937944
struct ggml_tensor * first, struct ggml_tensor * last,
938945
ggml_backend_buffer_type_t buft, size_t size,
939946
ggml_backend_buffer_t ** buffers, size_t * n_buffers) {
947+
940948
ggml_backend_buffer_t buffer = ggml_backend_buft_alloc_buffer(buft, size);
941949
if (buffer == NULL) {
942-
#ifndef NDEBUG
943-
GGML_LOG_DEBUG("%s: failed to allocate %s buffer of size %zu\n", __func__, ggml_backend_buft_name(buft), size);
944-
#endif
945-
for (size_t i = 0; i < *n_buffers; i++) {
946-
ggml_backend_buffer_free((*buffers)[i]);
947-
}
948-
free(*buffers);
950+
GGML_LOG_ERROR("%s: failed to allocate %s buffer of size %zu\n", __func__, ggml_backend_buft_name(buft), size);
951+
free_buffers(buffers, n_buffers);
949952
return false;
950953
}
951954

955+
*buffers = realloc(*buffers, sizeof(ggml_backend_buffer_t) * (*n_buffers + 1));
956+
(*buffers)[(*n_buffers)++] = buffer;
957+
952958
struct ggml_tallocr tallocr = ggml_tallocr_new(buffer);
953959

954960
for (struct ggml_tensor * t = first; t != last; t = ggml_get_next_tensor(ctx, t)) {
961+
enum ggml_status status = GGML_STATUS_SUCCESS;
955962
if (t->data == NULL) {
956963
if (t->view_src == NULL) {
957-
ggml_tallocr_alloc(&tallocr, t);
964+
status = ggml_tallocr_alloc(&tallocr, t);
958965
} else if (t->buffer == NULL) {
959-
ggml_backend_view_init(t);
966+
status = ggml_backend_view_init(t);
960967
}
961968
} else {
962969
if (t->view_src != NULL && t->buffer == NULL) {
963970
// view of a pre-allocated tensor
964-
ggml_backend_view_init(t);
971+
status = ggml_backend_view_init(t);
965972
}
966973
}
974+
if (status != GGML_STATUS_SUCCESS) {
975+
GGML_LOG_ERROR("%s: failed to initialize tensor %s\n", __func__, t->name);
976+
free_buffers(buffers, n_buffers);
977+
return false;
978+
}
967979
}
968980

969-
*buffers = realloc(*buffers, sizeof(ggml_backend_buffer_t) * (*n_buffers + 1));
970-
(*buffers)[(*n_buffers)++] = buffer;
971-
972981
return true;
973982
}
974983

ggml/src/ggml-backend-impl.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ extern "C" {
4444
// base address of the buffer
4545
void * (*get_base) (ggml_backend_buffer_t buffer);
4646
// (optional) initialize a tensor in the buffer (eg. add tensor extras)
47-
void (*init_tensor) (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
47+
enum ggml_status (*init_tensor)(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
4848
// tensor data access
4949
void (*memset_tensor)(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, uint8_t value, size_t offset, size_t size);
5050
void (*set_tensor) (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);

ggml/src/ggml-backend.cpp

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -126,11 +126,12 @@ void * ggml_backend_buffer_get_base(ggml_backend_buffer_t buffer) {
126126
return base;
127127
}
128128

129-
void ggml_backend_buffer_init_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor) {
129+
enum ggml_status ggml_backend_buffer_init_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor) {
130130
// init_tensor is optional
131131
if (buffer->iface.init_tensor) {
132-
buffer->iface.init_tensor(buffer, tensor);
132+
return buffer->iface.init_tensor(buffer, tensor);
133133
}
134+
return GGML_STATUS_SUCCESS;
134135
}
135136

136137
void ggml_backend_buffer_clear(ggml_backend_buffer_t buffer, uint8_t value) {
@@ -1641,18 +1642,18 @@ ggml_backend_t ggml_backend_sched_get_tensor_backend(ggml_backend_sched_t sched,
16411642

16421643
// utils
16431644

1644-
void ggml_backend_view_init(struct ggml_tensor * tensor) {
1645+
enum ggml_status ggml_backend_view_init(struct ggml_tensor * tensor) {
16451646
GGML_ASSERT(tensor->buffer == NULL);
16461647
GGML_ASSERT(tensor->view_src != NULL);
16471648
GGML_ASSERT(tensor->view_src->buffer != NULL);
16481649
GGML_ASSERT(tensor->view_src->data != NULL);
16491650

16501651
tensor->buffer = tensor->view_src->buffer;
16511652
tensor->data = (char *)tensor->view_src->data + tensor->view_offs;
1652-
ggml_backend_buffer_init_tensor(tensor->buffer, tensor);
1653+
return ggml_backend_buffer_init_tensor(tensor->buffer, tensor);
16531654
}
16541655

1655-
void ggml_backend_tensor_alloc(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, void * addr) {
1656+
enum ggml_status ggml_backend_tensor_alloc(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, void * addr) {
16561657
GGML_ASSERT(tensor->buffer == NULL);
16571658
GGML_ASSERT(tensor->data == NULL);
16581659
GGML_ASSERT(tensor->view_src == NULL);
@@ -1662,7 +1663,7 @@ void ggml_backend_tensor_alloc(ggml_backend_buffer_t buffer, struct ggml_tensor
16621663

16631664
tensor->buffer = buffer;
16641665
tensor->data = addr;
1665-
ggml_backend_buffer_init_tensor(buffer, tensor);
1666+
return ggml_backend_buffer_init_tensor(buffer, tensor);
16661667
}
16671668

16681669
static struct ggml_tensor * graph_copy_dup_tensor(struct ggml_hash_set hash_set, struct ggml_tensor ** node_copies,
@@ -1708,7 +1709,8 @@ static void graph_copy_init_tensor(struct ggml_hash_set * hash_set, struct ggml_
17081709
struct ggml_tensor * dst = node_copies[id];
17091710
if (dst->view_src != NULL) {
17101711
graph_copy_init_tensor(hash_set, node_copies, node_init, src->view_src);
1711-
ggml_backend_view_init(dst);
1712+
enum ggml_status status = ggml_backend_view_init(dst);
1713+
GGML_ASSERT(status == GGML_STATUS_SUCCESS);
17121714
}
17131715
else {
17141716
ggml_backend_tensor_copy(src, dst);
@@ -1823,7 +1825,6 @@ bool ggml_backend_compare_graph_backend(ggml_backend_t backend1, ggml_backend_t
18231825
assert(g1->n_nodes == g2->n_nodes);
18241826

18251827
for (int i = 0; i < g1->n_nodes; i++) {
1826-
//printf("eval %d/%d\n", i, g1->n_nodes);
18271828
struct ggml_tensor * t1 = g1->nodes[i];
18281829
struct ggml_tensor * t2 = g2->nodes[i];
18291830

0 commit comments

Comments
 (0)