@@ -32,6 +32,7 @@ struct split_params {
3232 int n_split_tensors = 128 ;
3333 std::string input;
3434 std::string output;
35+ bool no_tensor_first_split = false ;
3536 bool dry_run = false ;
3637};
3738
@@ -49,6 +50,7 @@ static void split_print_usage(const char * executable) {
4950 printf (" --merge merge multiple GGUF to a single GGUF\n " );
5051 printf (" --split-max-tensors max tensors in each split (default: %d)\n " , default_params.n_split_tensors );
5152 printf (" --split-max-size N(M|G) max size per split\n " );
53+ printf (" --no-tensor-first-split do not add tensors to the first split (disabled by default)\n " );
5254 printf (" --dry-run only print out a split plan and exit, without writing any new files\n " );
5355 printf (" \n " );
5456}
@@ -100,6 +102,10 @@ static void split_params_parse_ex(int argc, const char ** argv, split_params & p
100102 arg_found = true ;
101103 params.dry_run = true ;
102104 }
105+ if (arg == " --no-tensor-first-split" ) {
106+ arg_found = true ;
107+ params.no_tensor_first_split = true ;
108+ }
103109
104110 if (is_op_set) {
105111 throw std::invalid_argument (" error: either --split or --merge can be specified, but not both" );
@@ -200,10 +206,10 @@ struct split_strategy {
200206 // because we need to know list of tensors for each file in advance, we will build all the ctx_out for all output splits
201207 int i_split = -1 ;
202208 struct gguf_context * ctx_out = NULL ;
203- auto new_ctx_out = [&]() {
209+ auto new_ctx_out = [&](bool allow_no_tensors ) {
204210 i_split++;
205211 if (ctx_out != NULL ) {
206- if (gguf_get_n_tensors (ctx_out) == 0 ) {
212+ if (gguf_get_n_tensors (ctx_out) == 0 && !allow_no_tensors ) {
207213 fprintf (stderr, " error: one of splits have 0 tensors. Maybe size or tensors limit is too small\n " );
208214 exit (EXIT_FAILURE);
209215 }
@@ -220,7 +226,12 @@ struct split_strategy {
220226 };
221227
222228 // initialize ctx_out for the first split
223- new_ctx_out ();
229+ new_ctx_out (false );
230+
231+ // skip first split if no_tensor_first_split is set
232+ if (params.no_tensor_first_split ) {
233+ new_ctx_out (true );
234+ }
224235
225236 // process tensors one by one
226237 size_t curr_tensors_size = 0 ; // current size by counting only tensors size (without metadata)
@@ -230,7 +241,7 @@ struct split_strategy {
230241 size_t n_bytes = GGML_PAD (ggml_nbytes (t), GGUF_DEFAULT_ALIGNMENT);
231242 size_t next_tensors_size = curr_tensors_size + n_bytes;
232243 if (should_split (i, next_tensors_size)) {
233- new_ctx_out ();
244+ new_ctx_out (false );
234245 curr_tensors_size = n_bytes;
235246 } else {
236247 curr_tensors_size = next_tensors_size;
0 commit comments