Skip to content

Commit a1610bd

Browse files
committed
[ggml/llama] Align structures after scoped enums for 64-bit platforms
- llama_model_params 72 bytes -> 64 bytes - ggml_cgraph 80 bytes -> 72 bytes - hash_node 32 bytes -> 24 bytes - ggml_threadpool 160 -> 152 bytes - best_tokenization 24 -> 16 bytes
1 parent 55071fb commit a1610bd

File tree

10 files changed

+19
-18
lines changed

10 files changed

+19
-18
lines changed

ggml/src/ggml-alloc.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -339,10 +339,10 @@ static size_t ggml_dyn_tallocr_max_size(struct ggml_dyn_tallocr * alloc) {
339339
// graph allocator
340340

341341
struct hash_node {
342+
size_t offset; // offset within the buffer
342343
int n_children;
343344
int n_views;
344345
int buffer_id;
345-
size_t offset; // offset within the buffer
346346
bool allocated;
347347
};
348348

ggml/src/ggml-cpu/ggml-cpu.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -451,6 +451,8 @@ struct ggml_threadpool {
451451
atomic_int GGML_CACHE_ALIGN n_barrier_passed;
452452
atomic_int GGML_CACHE_ALIGN current_chunk; // currently processing chunk during Mat_Mul, shared between all the threads.
453453

454+
enum ggml_status ec;
455+
454456
// these are atomic as an annotation for thread-sanitizer
455457
atomic_bool stop; // Used for stopping the threadpool altogether
456458
atomic_bool pause; // Used for pausing the threadpool or individual threads
@@ -462,8 +464,6 @@ struct ggml_threadpool {
462464

463465
int32_t prio; // Scheduling priority
464466
uint32_t poll; // Polling level (0 - no polling)
465-
466-
enum ggml_status ec;
467467
};
468468

469469
// Per-thread state

ggml/src/ggml-impl.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -295,14 +295,14 @@ struct ggml_cgraph {
295295
int n_nodes; // number of nodes currently in use
296296
int n_leafs; // number of leafs currently in use
297297

298+
enum ggml_cgraph_eval_order order;
299+
298300
struct ggml_tensor ** nodes; // tensors with data that can change if the graph is evaluated
299301
struct ggml_tensor ** grads; // the outputs of these tensors are the gradients of the nodes
300302
struct ggml_tensor ** grad_accs; // accumulators for node gradients
301303
struct ggml_tensor ** leafs; // tensors with constant data
302304

303305
struct ggml_hash_set visited_hash_set;
304-
305-
enum ggml_cgraph_eval_order order;
306306
};
307307

308308
// returns a slice of cgraph with nodes [i0, i1)

ggml/src/ggml-quants.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4996,7 +4996,7 @@ static bool validate_fp16(ggml_fp16_t f, size_t i) {
49964996
}
49974997

49984998
bool ggml_validate_row_data(enum ggml_type type, const void * data, size_t nbytes) {
4999-
if (type < 0 || type >= GGML_TYPE_COUNT) {
4999+
if (type >= GGML_TYPE_COUNT) {
50005000
fprintf(stderr, "%s: invalid type %d\n", __func__, type);
50015001
return false;
50025002
}

ggml/src/ggml-rpc/ggml-rpc.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ struct rpc_tensor {
7777
static_assert(sizeof(rpc_tensor) % 8 == 0, "rpc_tensor size must be multiple of 8");
7878

7979
// RPC commands
80-
enum rpc_cmd {
80+
enum rpc_cmd : uint8_t {
8181
RPC_CMD_ALLOC_BUFFER = 0,
8282
RPC_CMD_GET_ALIGNMENT,
8383
RPC_CMD_GET_MAX_SIZE,

ggml/src/ggml.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1565,7 +1565,7 @@ static struct ggml_tensor * ggml_new_tensor_impl(
15651565
struct ggml_tensor * view_src,
15661566
size_t view_offs) {
15671567

1568-
GGML_ASSERT(type >= 0 && type < GGML_TYPE_COUNT);
1568+
GGML_ASSERT(type < GGML_TYPE_COUNT);
15691569
GGML_ASSERT(n_dims >= 1 && n_dims <= GGML_MAX_DIMS);
15701570

15711571
// find the base tensor and absolute offset
@@ -5921,12 +5921,12 @@ struct ggml_cgraph * ggml_new_graph_custom(struct ggml_context * ctx, size_t siz
59215921
/*.size =*/ size,
59225922
/*.n_nodes =*/ 0,
59235923
/*.n_leafs =*/ 0,
5924+
/*.order =*/ GGML_CGRAPH_EVAL_ORDER_LEFT_TO_RIGHT,
59245925
/*.nodes =*/ nodes_ptr,
59255926
/*.grads =*/ grads_ptr,
59265927
/*.grad_accs =*/ grad_accs_ptr,
59275928
/*.leafs =*/ leafs_ptr,
59285929
/*.hash_table =*/ { hash_size, hash_used, hash_keys_ptr },
5929-
/*.order =*/ GGML_CGRAPH_EVAL_ORDER_LEFT_TO_RIGHT,
59305930
};
59315931

59325932
ggml_hash_set_reset(&cgraph->visited_hash_set);
@@ -5947,12 +5947,12 @@ struct ggml_cgraph ggml_graph_view(struct ggml_cgraph * cgraph0, int i0, int i1)
59475947
/*.size =*/ 0,
59485948
/*.n_nodes =*/ i1 - i0,
59495949
/*.n_leafs =*/ 0,
5950+
/*.order =*/ cgraph0->order,
59505951
/*.nodes =*/ cgraph0->nodes + i0,
59515952
/*.grads =*/ NULL, // gradients would need visited_hash_set
59525953
/*.grad_accs =*/ NULL,
59535954
/*.leafs =*/ NULL,
59545955
/*.visited_hash_set =*/ { 0, NULL, NULL },
5955-
/*.order =*/ cgraph0->order,
59565956
};
59575957

59585958
return cgraph;

ggml/src/gguf.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -546,7 +546,7 @@ struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_par
546546
ok = ok && gr.read(info.t.type);
547547

548548
// check that tensor type is within defined range
549-
if (info.t.type < 0 || info.t.type >= GGML_TYPE_COUNT) {
549+
if (info.t.type >= GGML_TYPE_COUNT) {
550550
fprintf(stderr, "%s: tensor '%s' has invalid ggml type %d (%s)\n",
551551
__func__, info.t.name, info.t.type, ggml_type_name(info.t.type));
552552
ok = false;

include/llama.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -294,7 +294,6 @@ extern "C" {
294294
const struct llama_model_tensor_buft_override * tensor_buft_overrides;
295295

296296
int32_t n_gpu_layers; // number of layers to store in VRAM
297-
enum llama_split_mode split_mode; // how to split the model across multiple GPUs
298297

299298
// the GPU that is used for the entire model when split_mode is LLAMA_SPLIT_MODE_NONE
300299
int32_t main_gpu;
@@ -313,6 +312,8 @@ extern "C" {
313312
// override key-value pairs of the model meta data
314313
const struct llama_model_kv_override * kv_overrides;
315314

315+
enum llama_split_mode split_mode; // how to split the model across multiple GPUs
316+
316317
// Keep the booleans together to avoid misalignment during copy-by-value.
317318
bool vocab_only; // only load the vocabulary, no weights
318319
bool use_mmap; // use mmap if possible

src/llama-model.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13076,12 +13076,12 @@ llama_model_params llama_model_default_params() {
1307613076
/*.devices =*/ nullptr,
1307713077
/*.tensor_buft_overrides =*/ nullptr,
1307813078
/*.n_gpu_layers =*/ 0,
13079-
/*.split_mode =*/ LLAMA_SPLIT_MODE_LAYER,
1308013079
/*.main_gpu =*/ 0,
1308113080
/*.tensor_split =*/ nullptr,
1308213081
/*.progress_callback =*/ nullptr,
1308313082
/*.progress_callback_user_data =*/ nullptr,
1308413083
/*.kv_overrides =*/ nullptr,
13084+
/*.split_mode =*/ LLAMA_SPLIT_MODE_LAYER,
1308513085
/*.vocab_only =*/ false,
1308613086
/*.use_mmap =*/ true,
1308713087
/*.use_mlock =*/ false,

src/llama-vocab.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -826,9 +826,9 @@ struct llm_tokenizer_ugm_session {
826826
}
827827

828828
// initialize score_sum to -FLT_MAX so it will be always lower than sums of token scores
829-
std::vector<struct best_tokenization> tokenization_results(input_len + 1, {vocab.token_unk(), 0, -FLT_MAX});
829+
std::vector<struct best_tokenization> tokenization_results(input_len + 1, {0, vocab.token_unk(), -FLT_MAX});
830830
// at the beginning tokenization score is zero
831-
tokenization_results[0] = { vocab.token_unk(), 0, 0 };
831+
tokenization_results[0] = { 0, vocab.token_unk(), 0 };
832832

833833
for (size_t input_offset = 0; input_offset < input_len;) {
834834
size_t prefix_offset = input_offset;
@@ -858,7 +858,7 @@ struct llm_tokenizer_ugm_session {
858858
const double challenger_score = current_best.score_sum + token_score;
859859
struct best_tokenization & current_champ = tokenization_results[prefix_offset];
860860
if (challenger_score > current_champ.score_sum) {
861-
struct best_tokenization challenger = { token_id, input_offset, (float) challenger_score };
861+
struct best_tokenization challenger = { input_offset, token_id, (float) challenger_score };
862862
current_champ = challenger;
863863
}
864864
}
@@ -872,7 +872,7 @@ struct llm_tokenizer_ugm_session {
872872
prefix_offset = input_offset + n_utf8_code_units;
873873
struct best_tokenization & current_champ = tokenization_results[prefix_offset];
874874
if (challenger_score > current_champ.score_sum) {
875-
struct best_tokenization challenger = { vocab.token_unk(), input_offset, (float) challenger_score };
875+
struct best_tokenization challenger = { input_offset, vocab.token_unk(), (float) challenger_score };
876876
current_champ = challenger;
877877
}
878878
}
@@ -996,8 +996,8 @@ struct llm_tokenizer_ugm_session {
996996

997997
// this structure stores the best tokenization so far at input_offset
998998
struct best_tokenization {
999-
llama_token token_id;
1000999
size_t input_offset;
1000+
llama_token token_id;
10011001
float score_sum;
10021002
};
10031003

0 commit comments

Comments
 (0)