Skip to content

Commit eb2e355

Browse files
committed
[aux] gguf tensor must be followed
Add a flag to the tool to ensure some tensor names are always followed by another tensor and not at the end of a shard. This ensures the shard will not be released when the tensor is processed, and avoid missing-file failures of duplicate tensors that are re-referenced a few tensors later (typically token_embd.weight / output).
1 parent 67e1868 commit eb2e355

File tree

1 file changed

+22
-0
lines changed

1 file changed

+22
-0
lines changed

tools/gguf-split/gguf-split.cpp

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#include <fstream>
1414
#include <string>
1515
#include <vector>
16+
#include <set>
1617

1718
#if defined(_WIN32)
1819
#include <windows.h>
@@ -43,6 +44,7 @@ struct split_params {
4344
std::string output;
4445
bool no_tensor_first_split = false;
4546
bool dry_run = false;
47+
std::set<std::string> must_be_followed_layers;
4648
};
4749

4850
static void split_print_usage(const char * executable) {
@@ -61,6 +63,7 @@ static void split_print_usage(const char * executable) {
6163
printf(" --split-max-tensors max tensors in each split (default: %d)\n", default_params.n_split_tensors);
6264
printf(" --split-max-size N(M|G) max size per split\n");
6365
printf(" --no-tensor-first-split do not add tensors to the first split (disabled by default)\n");
66+
printf(" --must-be-followed LAYER ensure LAYER is not the last tensor in a split and will not be released when loading after any tensor is created (can be used multiple times)\n");
6467
printf(" --dry-run only print out a split plan and exit, without writing any new files\n");
6568
printf("\n");
6669
}
@@ -144,6 +147,13 @@ static void split_params_parse_ex(int argc, const char ** argv, split_params & p
144147
}
145148
params.mode = MODE_SIZE;
146149
params.n_bytes_split = split_str_to_n_bytes(argv[arg_idx]);
150+
} else if (arg == "--must-be-followed") {
151+
if (++arg_idx >= argc) {
152+
invalid_param = true;
153+
break;
154+
}
155+
arg_found = true;
156+
params.must_be_followed_layers.insert(argv[arg_idx]);
147157
}
148158

149159
if (!arg_found) {
@@ -276,7 +286,19 @@ struct split_strategy {
276286
}
277287
}
278288

289+
bool must_be_followed(int i_tensor) {
290+
if (i_tensor > 0 && i_tensor < n_tensors) {
291+
const char* tensor_name = gguf_get_tensor_name(ctx_gguf, i_tensor);
292+
return params.must_be_followed_layers.find(tensor_name) != params.must_be_followed_layers.end();
293+
}
294+
return false;
295+
}
296+
279297
bool should_split(int i_tensor, size_t next_size) {
298+
if (must_be_followed(i_tensor) || must_be_followed(i_tensor - 1)) {
299+
return false;
300+
}
301+
280302
if (params.mode == MODE_SIZE) {
281303
// split by max size per file
282304
return next_size > params.n_bytes_split;

0 commit comments

Comments
 (0)