Skip to content
Merged
Show file tree
Hide file tree
Changes from 35 commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
21e2d58
draft load_gguf refactor
Isotr0py Oct 24, 2024
0e381fa
Merge branch 'main' into gguf-tensor-map
Isotr0py Dec 4, 2024
f4e2f1f
update
Isotr0py Dec 4, 2024
8a4521d
remove llama mapping
Isotr0py Dec 4, 2024
490572e
remove qwen2 mapping
Isotr0py Dec 4, 2024
ad06fde
remove unused function
Isotr0py Dec 4, 2024
2fd5c4f
deprecate stablelm mapping
Isotr0py Dec 5, 2024
ccd2972
deprecate phi3 mapping
Isotr0py Dec 5, 2024
0ac0980
Merge branch 'main' into gguf-tensor-map
Isotr0py Dec 5, 2024
0762572
deprecate t5 mapping
Isotr0py Dec 8, 2024
5895d6e
deprecate bloom mapping
Isotr0py Dec 8, 2024
08ef6a9
Merge branch 'huggingface:main' into gguf-tensor-map
Isotr0py Dec 8, 2024
967bab2
fix bloom
Isotr0py Dec 8, 2024
d4feeff
deprecate starcoder2 mapping
Isotr0py Dec 8, 2024
f5f2113
deprecate gpt2 mapping
Isotr0py Dec 8, 2024
3c9d7a1
deprecate mistral mapping
Isotr0py Dec 8, 2024
ef476b8
deprecate nemotron mapping
Isotr0py Dec 8, 2024
9cebfcc
deprecate mamba mapping
Isotr0py Dec 8, 2024
4ca22a4
deprecate mamba mapping
Isotr0py Dec 8, 2024
2f24591
code format
Isotr0py Dec 8, 2024
5f7c852
code format
Isotr0py Dec 8, 2024
424f9f0
fix mamba
Isotr0py Dec 9, 2024
83fc1d4
fix qwen2moe
Isotr0py Dec 9, 2024
cca300e
remove qwen2moe mapping
Isotr0py Dec 9, 2024
87438a6
clean up
Isotr0py Dec 10, 2024
5641fb5
remove falcon 7b map
Isotr0py Dec 10, 2024
07dd41a
remove all ggml tensors mapping
Isotr0py Dec 10, 2024
809790e
Merge branch 'main' into gguf-tensor-map
Isotr0py Dec 10, 2024
d8b613f
add comments
Isotr0py Dec 10, 2024
93c6248
update messages
Isotr0py Dec 10, 2024
acec7bd
Merge branch 'main' into gguf-tensor-map
Isotr0py Dec 10, 2024
240dff4
fix tensors in parsed parameters
Isotr0py Dec 16, 2024
f028a65
Merge branch 'main' into gguf-tensor-map
Isotr0py Dec 16, 2024
b43e1ee
add gguf check
Isotr0py Dec 16, 2024
bb9b204
Merge branch 'main' into gguf-tensor-map
Isotr0py Jan 2, 2025
a832783
Merge branch 'main' into gguf-tensor-map
Isotr0py Jan 3, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions src/transformers/integrations/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,6 @@
"fsdp": ["is_fsdp_managed_module"],
"ggml": [
"GGUF_CONFIG_MAPPING",
"GGUF_TENSOR_MAPPING",
"GGUF_TOKENIZER_MAPPING",
"_gguf_parse_value",
"load_dequant_gguf_tensor",
Expand Down Expand Up @@ -161,7 +160,6 @@
from .fsdp import is_fsdp_managed_module
from .ggml import (
GGUF_CONFIG_MAPPING,
GGUF_TENSOR_MAPPING,
GGUF_TOKENIZER_MAPPING,
_gguf_parse_value,
load_dequant_gguf_tensor,
Expand Down
232 changes: 0 additions & 232 deletions src/transformers/integrations/ggml.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,238 +33,6 @@
logger = logging.get_logger(__name__)


GGUF_TENSOR_MAPPING = {
"llama": {
"token_embd": "model.embed_tokens",
"blk": "model.layers",
"ffn_up": "mlp.up_proj",
"ffn_down": "mlp.down_proj",
"ffn_gate": "mlp.gate_proj",
"ffn_norm": "post_attention_layernorm",
"attn_norm": "input_layernorm",
"attn_q": "self_attn.q_proj",
"attn_v": "self_attn.v_proj",
"attn_k": "self_attn.k_proj",
"attn_output": "self_attn.o_proj",
"output.weight": "lm_head.weight",
"output_norm": "model.norm",
},
"mistral": {
"token_embd": "model.embed_tokens",
"blk": "model.layers",
"ffn_up": "mlp.up_proj",
"ffn_down": "mlp.down_proj",
"ffn_gate": "mlp.gate_proj",
"ffn_norm": "post_attention_layernorm",
"attn_norm": "input_layernorm",
"attn_q": "self_attn.q_proj",
"attn_v": "self_attn.v_proj",
"attn_k": "self_attn.k_proj",
"attn_output": "self_attn.o_proj",
"output.weight": "lm_head.weight",
"output_norm": "model.norm",
},
"qwen2": {
"token_embd": "model.embed_tokens",
"blk": "model.layers",
"ffn_up": "mlp.up_proj",
"ffn_down": "mlp.down_proj",
"ffn_gate": "mlp.gate_proj",
"ffn_norm": "post_attention_layernorm",
"attn_norm": "input_layernorm",
"attn_q": "self_attn.q_proj",
"attn_v": "self_attn.v_proj",
"attn_k": "self_attn.k_proj",
"attn_output": "self_attn.o_proj",
"output.weight": "lm_head.weight",
"output_norm": "model.norm",
},
"qwen2moe": {
"token_embd": "model.embed_tokens",
"blk": "model.layers",
"ffn_up_exps": "mlp.experts",
"ffn_up_shexp": "mlp.shared_expert.up_proj",
"ffn_down_exps": "mlp.experts",
"ffn_down_shexp": "mlp.shared_expert.down_proj",
"ffn_norm": "post_attention_layernorm",
"ffn_gate_inp.weight": "mlp.gate.weight",
"ffn_gate_exps": "mlp.experts",
"ffn_gate_shexp": "mlp.shared_expert.gate_proj",
"ffn_gate_inp_shexp": "mlp.shared_expert_gate",
"attn_norm": "input_layernorm",
"attn_q": "self_attn.q_proj",
"attn_v": "self_attn.v_proj",
"attn_k": "self_attn.k_proj",
"attn_output": "self_attn.o_proj",
"output.weight": "lm_head.weight",
"output_norm": "model.norm",
},
"phi3": {
"token_embd": "model.embed_tokens",
"blk": "model.layers",
"ffn_up": "mlp.gate_up_proj",
"ffn_down": "mlp.down_proj",
"ffn_gate": "mlp.gate_up_proj",
"ffn_norm": "post_attention_layernorm",
"attn_norm": "input_layernorm",
"attn_qkv": "self_attn.qkv_proj",
"attn_output": "self_attn.o_proj",
"output.weight": "lm_head.weight",
"output_norm": "model.norm",
},
"bloom": {
"token_embd.weight": "transformer.word_embeddings.weight",
"token_embd_norm": "transformer.word_embeddings_layernorm",
"blk": "transformer.h",
"ffn_up": "mlp.dense_h_to_4h",
"ffn_down": "mlp.dense_4h_to_h",
"ffn_norm": "post_attention_layernorm",
"attn_norm": "input_layernorm",
"attn_qkv": "self_attention.query_key_value",
"attn_output": "self_attention.dense",
"output.weight": "lm_head.weight",
"output_norm": "transformer.ln_f",
},
"falcon7b": {
"token_embd": "word_embeddings",
"blk": "h",
"ffn_up": "mlp.dense_h_to_4h",
"ffn_down": "mlp.dense_4h_to_h",
"attn_norm": "input_layernorm",
"attn_qkv": "self_attention.query_key_value",
"attn_output": "self_attention.dense",
".output.": ".lm_head.",
"output_norm": "ln_f",
},
"falcon40b": {
"token_embd": "word_embeddings",
"blk": "h",
"ffn_up": "mlp.dense_h_to_4h",
"ffn_down": "mlp.dense_4h_to_h",
".attn_norm.": ".ln_mlp.",
"attn_norm_2": "ln_attn",
"attn_qkv": "self_attention.query_key_value",
"attn_output": "self_attention.dense",
".output.": ".lm_head.",
"output_norm": "ln_f",
},
"t5": {
"token_embd": "shared",
"dec.blk.{bid}.attn_q": "decoder.block.{bid}.layer.0.SelfAttention.q",
"dec.blk.{bid}.attn_k": "decoder.block.{bid}.layer.0.SelfAttention.k",
"dec.blk.{bid}.attn_v": "decoder.block.{bid}.layer.0.SelfAttention.v",
"dec.blk.{bid}.attn_o": "decoder.block.{bid}.layer.0.SelfAttention.o",
"dec.blk.{bid}.attn_rel_b": "decoder.block.{bid}.layer.0.SelfAttention.relative_attention_bias",
"dec.blk.{bid}.attn_norm": "decoder.block.{bid}.layer.0.layer_norm",
"dec.blk.{bid}.cross_attn_q": "decoder.block.{bid}.layer.1.EncDecAttention.q",
"dec.blk.{bid}.cross_attn_k": "decoder.block.{bid}.layer.1.EncDecAttention.k",
"dec.blk.{bid}.cross_attn_v": "decoder.block.{bid}.layer.1.EncDecAttention.v",
"dec.blk.{bid}.cross_attn_o": "decoder.block.{bid}.layer.1.EncDecAttention.o",
"dec.blk.{bid}.cross_attn_norm": "decoder.block.{bid}.layer.1.layer_norm",
"dec.blk.{bid}.ffn_gate": "decoder.block.{bid}.layer.2.DenseReluDense.wi_0",
"dec.blk.{bid}.ffn_up": "decoder.block.{bid}.layer.2.DenseReluDense.wi_1",
"dec.blk.{bid}.ffn_down": "decoder.block.{bid}.layer.2.DenseReluDense.wo",
"dec.blk.{bid}.ffn_norm": "decoder.block.{bid}.layer.2.layer_norm",
"dec.output_norm": "decoder.final_layer_norm",
"enc.blk.{bid}.attn_q": "encoder.block.{bid}.layer.0.SelfAttention.q",
"enc.blk.{bid}.attn_k": "encoder.block.{bid}.layer.0.SelfAttention.k",
"enc.blk.{bid}.attn_v": "encoder.block.{bid}.layer.0.SelfAttention.v",
"enc.blk.{bid}.attn_o": "encoder.block.{bid}.layer.0.SelfAttention.o",
"enc.blk.{bid}.attn_rel_b": "encoder.block.{bid}.layer.0.SelfAttention.relative_attention_bias",
"enc.blk.{bid}.attn_norm": "encoder.block.{bid}.layer.0.layer_norm",
"enc.blk.{bid}.ffn_gate": "encoder.block.{bid}.layer.1.DenseReluDense.wi_0",
"enc.blk.{bid}.ffn_up": "encoder.block.{bid}.layer.1.DenseReluDense.wi_1",
"enc.blk.{bid}.ffn_down": "encoder.block.{bid}.layer.1.DenseReluDense.wo",
"enc.blk.{bid}.ffn_norm": "encoder.block.{bid}.layer.1.layer_norm",
"enc.output_norm": "encoder.final_layer_norm",
"output.weight": "lm_head.weight",
},
"t5encoder": {
"token_embd": "shared",
"enc.blk.{bid}.attn_q": "encoder.block.{bid}.layer.0.SelfAttention.q",
"enc.blk.{bid}.attn_k": "encoder.block.{bid}.layer.0.SelfAttention.k",
"enc.blk.{bid}.attn_v": "encoder.block.{bid}.layer.0.SelfAttention.v",
"enc.blk.{bid}.attn_o": "encoder.block.{bid}.layer.0.SelfAttention.o",
"enc.blk.{bid}.attn_rel_b": "encoder.block.{bid}.layer.0.SelfAttention.relative_attention_bias",
"enc.blk.{bid}.attn_norm": "encoder.block.{bid}.layer.0.layer_norm",
"enc.blk.{bid}.ffn_gate": "encoder.block.{bid}.layer.1.DenseReluDense.wi_0",
"enc.blk.{bid}.ffn_up": "encoder.block.{bid}.layer.1.DenseReluDense.wi_1",
"enc.blk.{bid}.ffn_down": "encoder.block.{bid}.layer.1.DenseReluDense.wo",
"enc.blk.{bid}.ffn_norm": "encoder.block.{bid}.layer.1.layer_norm",
"enc.output_norm": "encoder.final_layer_norm",
},
"stablelm": {
"token_embd": "model.embed_tokens",
"blk": "model.layers",
"ffn_up": "mlp.up_proj",
"ffn_down": "mlp.down_proj",
"ffn_gate": "mlp.gate_proj",
"ffn_norm": "post_attention_layernorm",
"attn_norm": "input_layernorm",
"attn_q": "self_attn.q_proj",
"attn_v": "self_attn.v_proj",
"attn_k": "self_attn.k_proj",
"attn_output": "self_attn.o_proj",
"output.weight": "lm_head.weight",
"output_norm": "model.norm",
},
"gpt2": {
"token_embd": "transformer.wte",
"blk": "transformer.h",
"position_embd": "transformer.wpe",
"output_norm": "transformer.ln_f",
"attn_norm": "ln_1",
"attn_qkv": "attn.c_attn",
"attn_output.weight": "attn.c_proj.weight",
"attn_output.bias": "attn.c_proj.bias",
"ffn_norm": "ln_2",
"ffn_up": "mlp.c_fc",
"ffn_down": "mlp.c_proj",
},
"starcoder2": {
"token_embd": "model.embed_tokens",
"blk": "model.layers",
"ffn_up": "mlp.c_fc",
"ffn_down": "mlp.c_proj",
"ffn_norm": "post_attention_layernorm",
"attn_norm": "input_layernorm",
"attn_q": "self_attn.q_proj",
"attn_v": "self_attn.v_proj",
"attn_k": "self_attn.k_proj",
"attn_output": "self_attn.o_proj",
"output.weight": "lm_head.weight",
"output_norm": "model.norm",
},
"mamba": {
"token_embd": "backbone.embeddings",
"blk": "backbone.layers",
"ssm_a": "mixer.A_log",
"ssm_conv1d": "mixer.conv1d",
"ssm_in": "mixer.in_proj",
"ssm_out": "mixer.out_proj",
"ssm_x": "mixer.x_proj",
"ssm_dt": "mixer.dt_proj",
"attn_norm": "norm",
"output_norm": "backbone.norm_f",
"output.weight": "lm_head.weight",
},
"nemotron": {
"token_embd": "model.embed_tokens",
"blk": "model.layers",
"ffn_up": "mlp.up_proj",
"ffn_down": "mlp.down_proj",
"ffn_norm": "post_attention_layernorm",
"attn_norm": "input_layernorm",
"attn_q": "self_attn.q_proj",
"attn_v": "self_attn.v_proj",
"attn_k": "self_attn.k_proj",
"attn_output": "self_attn.o_proj",
"output.weight": "lm_head.weight",
"output_norm": "model.norm",
},
}


GGUF_CONFIG_MAPPING = {
"general": {
"architecture": "model_type",
Expand Down
Loading