Skip to content

Commit 73d6db9

Browse files
committed
Changes to support codellama
Don't load inv_freq tensor, add rope_theta config parameter Based on huggingface/transformers#24998 and huggingface/transformers#25740
1 parent 7bdcb49 commit 73d6db9

File tree

1 file changed

+7
-2
lines changed

1 file changed

+7
-2
lines changed

server/text_generation_server/models/custom_modeling/flash_llama_modeling.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@ def __init__(
6262
pretraining_tp=1,
6363
tie_word_embeddings=False,
6464
rope_scaling=None,
65+
rope_theta=10000.0,
6566
**kwargs,
6667
):
6768
self.vocab_size = vocab_size
@@ -82,6 +83,7 @@ def __init__(
8283
self.pretraining_tp = pretraining_tp
8384
self.use_cache = use_cache
8485
self.rope_scaling = rope_scaling
86+
self.rope_theta = rope_theta
8587

8688
super().__init__(
8789
pad_token_id=pad_token_id,
@@ -178,8 +180,11 @@ def __init__(
178180
self.hidden_size = config.hidden_size
179181
self.head_size = self.hidden_size // self.num_heads
180182

181-
self.rotary_emb = PositionRotaryEmbedding.load(
182-
prefix=f"{prefix}.rotary_emb", weights=weights
183+
# self.rotary_emb = PositionRotaryEmbedding.load(
184+
# prefix=f"{prefix}.rotary_emb", weights=weights
185+
# )
186+
self.rotary_emb = PositionRotaryEmbedding.static(
187+
dim=self.head_size, base=config.rope_theta, device=weights.device
183188
)
184189

185190
self.softmax_scale = self.head_size**-0.5

0 commit comments

Comments
 (0)