Skip to content

Commit 5a9b13e

Browse files
Complete pending todos in testing (#2088)
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
1 parent d32a738 commit 5a9b13e

File tree

4 files changed

+58
-67
lines changed

4 files changed

+58
-67
lines changed

litgpt/config.py

Lines changed: 50 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -1536,7 +1536,9 @@ def norm_class(self) -> Type:
15361536
name="CodeLlama-70b-Instruct-hf",
15371537
hf_config=dict(org="codellama", name="CodeLlama-70b-Instruct-hf"),
15381538
block_size=16384,
1539-
vocab_size=32016,
1539+
# 32016 is an added token, so not reported in vocab_size
1540+
# https://huggingface.co/codellama/CodeLlama-70b-Instruct-hf/blob/main/tokenizer_config.json
1541+
vocab_size=32015,
15401542
padding_multiple=16,
15411543
n_layer=80,
15421544
n_head=64,
@@ -2331,53 +2333,6 @@ def norm_class(self) -> Type:
23312333
),
23322334
]
23332335

2334-
qwen_2_5_1m = [
2335-
# https://huggingface.co/Qwen/Qwen2.5-7B-Instruct-1M/blob/main/config.json
2336-
dict(
2337-
name="Qwen2.5-7B-Instruct-1M",
2338-
hf_config=dict(org="Qwen", name="Qwen2.5-7B-Instruct-1M"),
2339-
block_size=1010000,
2340-
vocab_size=151643,
2341-
padded_vocab_size=152064,
2342-
n_layer=28,
2343-
n_head=28,
2344-
n_embd=3584,
2345-
n_query_groups=4,
2346-
rotary_percentage=1.0,
2347-
parallel_residual=False,
2348-
bias=False,
2349-
attn_bias=True,
2350-
norm_class_name="RMSNorm",
2351-
mlp_class_name="LLaMAMLP",
2352-
intermediate_size=18944,
2353-
norm_eps=1e-5,
2354-
rope_base=10000000,
2355-
),
2356-
# https://huggingface.co/Qwen/Qwen2.5-14B-Instruct-1M/blob/main/config.json
2357-
dict(
2358-
name="Qwen2.5-14B-Instruct-1M",
2359-
hf_config=dict(org="Qwen", name="Qwen2.5-14B-Instruct-1M"),
2360-
block_size=1010000,
2361-
vocab_size=151643,
2362-
padded_vocab_size=152064,
2363-
n_layer=48,
2364-
n_head=40,
2365-
n_embd=5120,
2366-
n_query_groups=8,
2367-
rotary_percentage=1.0,
2368-
parallel_residual=False,
2369-
bias=False,
2370-
attn_bias=True,
2371-
norm_class_name="RMSNorm",
2372-
mlp_class_name="LLaMAMLP",
2373-
intermediate_size=13824,
2374-
norm_eps=1e-5,
2375-
rope_base=10000000,
2376-
),
2377-
]
2378-
2379-
qwen_2_5.extend(qwen_2_5_1m)
2380-
23812336
qwen_2_5_coder = [
23822337
# https://huggingface.co/Qwen/Qwen2.5-Coder-0.5B/blob/main/config.json
23832338
dict(
@@ -2584,6 +2539,53 @@ def norm_class(self) -> Type:
25842539
copy["hf_config"]["name"] = c["hf_config"]["name"].format(kind)
25852540
configs.append(copy)
25862541

2542+
qwen_2_5_1m = [
2543+
# https://huggingface.co/Qwen/Qwen2.5-7B-Instruct-1M/blob/main/config.json
2544+
dict(
2545+
name="Qwen2.5-7B-Instruct-1M",
2546+
hf_config=dict(org="Qwen", name="Qwen2.5-7B-Instruct-1M"),
2547+
block_size=1010000,
2548+
vocab_size=151643,
2549+
padded_vocab_size=152064,
2550+
n_layer=28,
2551+
n_head=28,
2552+
n_embd=3584,
2553+
n_query_groups=4,
2554+
rotary_percentage=1.0,
2555+
parallel_residual=False,
2556+
bias=False,
2557+
attn_bias=True,
2558+
norm_class_name="RMSNorm",
2559+
mlp_class_name="LLaMAMLP",
2560+
intermediate_size=18944,
2561+
norm_eps=1e-5,
2562+
rope_base=10000000,
2563+
),
2564+
# https://huggingface.co/Qwen/Qwen2.5-14B-Instruct-1M/blob/main/config.json
2565+
dict(
2566+
name="Qwen2.5-14B-Instruct-1M",
2567+
hf_config=dict(org="Qwen", name="Qwen2.5-14B-Instruct-1M"),
2568+
block_size=1010000,
2569+
vocab_size=151643,
2570+
padded_vocab_size=152064,
2571+
n_layer=48,
2572+
n_head=40,
2573+
n_embd=5120,
2574+
n_query_groups=8,
2575+
rotary_percentage=1.0,
2576+
parallel_residual=False,
2577+
bias=False,
2578+
attn_bias=True,
2579+
norm_class_name="RMSNorm",
2580+
mlp_class_name="LLaMAMLP",
2581+
intermediate_size=13824,
2582+
norm_eps=1e-5,
2583+
rope_base=10000000,
2584+
),
2585+
]
2586+
2587+
configs.extend(qwen_2_5_1m)
2588+
25872589
##########
25882590
# QwQ
25892591
##########

tests/test_config_hub.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
("litgpt/pretrain.py", "pretrain/tinystories.yaml"),
1717
(
1818
"litgpt/pretrain.py",
19-
"https://raw.githubusercontent.com/Lightning-AI/litgpt/4d55ab6d0aa404f0da0d03a80a8801ed60e07e83/config_hub/pretrain/tinystories.yaml", # TODO: Update with path from main after merge
19+
"https://raw.githubusercontent.com/Lightning-AI/litgpt/main/config_hub/pretrain/tinystories.yaml",
2020
),
2121
]
2222

tests/test_rope.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
# Copyright Lightning AI. Licensed under the Apache License 2.0, see LICENSE file.
22

3-
import pytest
43
import torch
54
from transformers.models.gpt_neox.modeling_gpt_neox import GPTNeoXConfig, GPTNeoXRotaryEmbedding
65
from transformers.models.gpt_neox.modeling_gpt_neox import apply_rotary_pos_emb as apply_rotary_pos_emb_gptneo
@@ -231,8 +230,6 @@ def test_rope_llama_3_2():
231230

232231

233232
# See https://huggingface.co/google/gemma-3-27b-it/blob/main/config.json for settings
234-
# TODO: update HF transformers version to support Gemma3 and fix errors that causes after the update
235-
@pytest.mark.skip(reason="This test fails due to the HF transformers version not supporting Gemma3")
236233
@torch.inference_mode()
237234
def test_rope_gemma_3():
238235
from transformers.models.gemma3.configuration_gemma3 import Gemma3TextConfig

tests/test_tokenizer.py

Lines changed: 7 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -33,16 +33,18 @@ def test_tokenizer_against_hf(config, tmp_path):
3333
warnings.warn(str(ex), RuntimeWarning)
3434
if "tokenizer.json" not in hf_files and "tokenizer.model" not in hf_files:
3535
raise ConnectionError("Unable to download any tokenizer files from HF")
36+
37+
# we need to rename the dir to match the model name in testing as well
38+
# since we use to it determine the model in tokenizer.py
39+
tmp_path = tmp_path.rename(tmp_path.parent / config.hf_config["name"])
40+
3641
for filename, hf_file in hf_files.items():
3742
shutil.copy(hf_file, str(tmp_path / filename))
3843

3944
ours = Tokenizer(tmp_path)
4045

4146
assert ours.vocab_size == theirs.vocab_size
42-
if config.name.startswith("CodeLlama-70b-Instruct"):
43-
# TODO: the HF tokenizer returns 1 less token for this model. why?
44-
assert ours.vocab_size == config.vocab_size - 1
45-
elif config.name == "Mixtral-8x22B-v0.1":
47+
if config.name == "Mixtral-8x22B-v0.1":
4648
pytest.xfail(reason="Mixtral certainly lists 32000 vocab in its config")
4749
else:
4850
assert ours.vocab_size == config.vocab_size
@@ -70,17 +72,7 @@ def test_tokenizer_against_hf(config, tmp_path):
7072
prompt = PromptStyle.from_config(config).apply(prompt)
7173
actual = ours.encode(prompt)
7274
expected = theirs.encode(prompt)
73-
if (expected[0] == theirs.bos_token_id and actual[0] != theirs.bos_token_id) or (
74-
expected[0] == theirs.bos_token_id and expected[1] == theirs.bos_token_id
75-
):
76-
# TODO: check what is going on with the bos_tokens
77-
del expected[0]
78-
if config.name.startswith("CodeLlama-70b"):
79-
# TODO: there's a encoding difference with this model. why? note that the decoding is equal
80-
# "Hello": 10994, "▁Hello": 15043
81-
assert [15043 if t == 10994 else t for t in actual.tolist()] == expected
82-
else:
83-
assert actual.tolist() == expected
75+
assert actual.tolist() == expected
8476
assert ours.decode(actual) == theirs.decode(expected, skip_special_tokens=True)
8577

8678
if not config.name.startswith(("Mistral", "Mixtral")):

0 commit comments

Comments
 (0)