@@ -497,8 +497,8 @@ void llama_model::load_hparams(llama_model_loader & ml) {
497497 hparams.n_embd_head_v = 0 ;
498498 }
499499
500+ // for differentiating model types
500501 uint32_t n_vocab = 0 ;
501-
502502 ml.get_key (LLM_KV_VOCAB_SIZE, n_vocab, false ) || ml.get_arr_n (LLM_KV_TOKENIZER_LIST, n_vocab, false );
503503
504504 // arch-specific KVs
@@ -622,7 +622,6 @@ void llama_model::load_hparams(llama_model_loader & ml) {
622622 {
623623 ml.get_key (LLM_KV_ATTENTION_LAYERNORM_EPS, hparams.f_norm_eps );
624624 ml.get_key (LLM_KV_ATTENTION_CAUSAL, hparams.causal_attn );
625- ml.get_key (LLM_KV_TOKENIZER_TOKEN_TYPE_COUNT, hparams.n_vocab_type );
626625 ml.get_key (LLM_KV_POOLING_TYPE, hparams.pooling_type , false );
627626
628627 switch (hparams.n_layer ) {
@@ -645,7 +644,6 @@ void llama_model::load_hparams(llama_model_loader & ml) {
645644 {
646645 ml.get_key (LLM_KV_ATTENTION_LAYERNORM_EPS, hparams.f_norm_eps );
647646 ml.get_key (LLM_KV_ATTENTION_CAUSAL, hparams.causal_attn );
648- ml.get_key (LLM_KV_TOKENIZER_TOKEN_TYPE_COUNT, hparams.n_vocab_type );
649647 ml.get_key (LLM_KV_POOLING_TYPE, hparams.pooling_type , false );
650648 hparams.f_max_alibi_bias = 8 .0f ;
651649
@@ -659,7 +657,6 @@ void llama_model::load_hparams(llama_model_loader & ml) {
659657 {
660658 ml.get_key (LLM_KV_ATTENTION_LAYERNORM_EPS, hparams.f_norm_eps );
661659 ml.get_key (LLM_KV_ATTENTION_CAUSAL, hparams.causal_attn );
662- ml.get_key (LLM_KV_TOKENIZER_TOKEN_TYPE_COUNT, hparams.n_vocab_type );
663660 ml.get_key (LLM_KV_POOLING_TYPE, hparams.pooling_type );
664661
665662 if (hparams.n_layer == 12 && hparams.n_embd == 768 ) {
@@ -1367,7 +1364,7 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
13671364 const int64_t n_ff = hparams.n_ff ();
13681365 const int64_t n_embd_gqa = n_embd_v_gqa;
13691366 const int64_t n_vocab = vocab.n_vocab ();
1370- const int64_t n_vocab_type = hparams. n_vocab_type ;
1367+ const int64_t n_token_types = vocab. n_token_types () ;
13711368 const int64_t n_rot = hparams.n_rot ;
13721369 const int64_t n_expert = hparams.n_expert ;
13731370 const int64_t n_expert_used = hparams.n_expert_used ;
@@ -1812,7 +1809,7 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
18121809 case LLM_ARCH_NOMIC_BERT:
18131810 {
18141811 tok_embd = create_tensor (tn (LLM_TENSOR_TOKEN_EMBD, " weight" ), {n_embd, n_vocab}, 0 );
1815- type_embd = create_tensor (tn (LLM_TENSOR_TOKEN_TYPES, " weight" ), {n_embd, n_vocab_type }, 0 );
1812+ type_embd = create_tensor (tn (LLM_TENSOR_TOKEN_TYPES, " weight" ), {n_embd, n_token_types }, 0 );
18161813
18171814 if (arch == LLM_ARCH_BERT) {
18181815 pos_embd = create_tensor (tn (LLM_TENSOR_POS_EMBD, " weight" ), {n_embd, n_ctx_train}, 0 );
@@ -1866,7 +1863,7 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
18661863 case LLM_ARCH_JINA_BERT_V2:
18671864 {
18681865 tok_embd = create_tensor (tn (LLM_TENSOR_TOKEN_EMBD, " weight" ), {n_embd, n_vocab}, 0 ); // word_embeddings
1869- type_embd = create_tensor (tn (LLM_TENSOR_TOKEN_TYPES, " weight" ), {n_embd, n_vocab_type }, 0 ); // token_type_embeddings
1866+ type_embd = create_tensor (tn (LLM_TENSOR_TOKEN_TYPES, " weight" ), {n_embd, n_token_types }, 0 ); // token_type_embeddings
18701867
18711868 tok_norm = create_tensor (tn (LLM_TENSOR_TOKEN_EMBD_NORM, " weight" ), {n_embd}, 0 ); // LayerNorm
18721869 tok_norm_b = create_tensor (tn (LLM_TENSOR_TOKEN_EMBD_NORM, " bias" ), {n_embd}, 0 ); // LayerNorm bias
0 commit comments