Skip to content

Commit 7272556

Browse files
committed
Code Review
1 parent 85ee963 commit 7272556

File tree

3 files changed

+36
-70
lines changed

3 files changed

+36
-70
lines changed
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
{
2+
"dim": 896,
3+
"ffn_dim_multiplier": 1,
4+
"hidden_dim": 4864,
5+
"n_heads": 14,
6+
"n_kv_heads": 2,
7+
"n_layers": 24,
8+
"norm_eps": 1e-06,
9+
"rope_theta": 1000000.0,
10+
"use_scaled_rope": false,
11+
"vocab_size": 151936,
12+
"use_hf_rope": true,
13+
"attention_qkv_bias": true
14+
}

examples/qualcomm/oss_scripts/llama/hf_converter/convert_config.py

Lines changed: 0 additions & 45 deletions
This file was deleted.

examples/qualcomm/oss_scripts/llama/llama.py

Lines changed: 22 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -63,9 +63,6 @@
6363
from executorch.examples.models.llama.source_transformation.quantize import (
6464
get_quant_embedding_transform,
6565
)
66-
from executorch.examples.qualcomm.oss_scripts.llama.hf_converter.convert_config import (
67-
convert_configs,
68-
)
6966
from executorch.examples.qualcomm.oss_scripts.llama.model.static_llama import (
7067
LlamaModel,
7168
ModelArgs,
@@ -514,20 +511,25 @@ def compile(args, pte_filename, tokenizer):
514511
start_ts = time.time()
515512

516513
kv_config, prefill_config = None, None
514+
params_path = ""
517515
if args.params:
518-
with open(args.params) as f:
519-
kv_config = ModelArgs(**json.load(f))
516+
params_path = args.params
520517
else:
521-
# For huggingface decoder model, we need to convert config to match the keys
522-
model_id = HUGGING_FACE_REPO_IDS[args.decoder_model]
523-
kv_config = AutoConfig.from_pretrained(model_id)
524-
kv_config = convert_configs(kv_config)
525-
526-
if args.decoder_model == "qwen2_5":
527-
kv_config.attention_qkv_bias = True
518+
if args.decoder_model == "qwen2_5":
519+
cur_dir = os.path.dirname(__file__)
520+
params_path = os.path.join(
521+
cur_dir,
522+
"..",
523+
"..",
524+
"..",
525+
"models",
526+
"qwen2_5",
527+
"config",
528+
"0_5b_config.json",
529+
)
530+
with open(params_path) as f:
531+
kv_config = ModelArgs(**json.load(f))
528532

529-
if not hasattr(kv_config, "head_dim"):
530-
kv_config.head_dim = kv_config.dim // kv_config.n_heads
531533
# TODO: support batch inputs if necessary
532534
kv_config.max_batch_size = 1
533535
kv_config.max_seq_len = args.max_seq_len
@@ -1235,18 +1237,13 @@ def export_llama(args) -> None:
12351237
with open(runtime_tokenizer_path, "r+") as file:
12361238
data = json.load(file)
12371239
# TODO: Encountered the following error during runtime, so switched behavior for now.
1238-
# Error: libc++abi: terminating due to uncaught exception of type std::runtime_error:
1239-
# Unsupported behavior 'Isolated' for Split PreTokenizer. Only 'MergedWithPrevious' is supported.
1240-
behavior = data["pre_tokenizer"]["pretokenizers"][0]["behavior"]
1241-
if behavior == "Isolated":
1242-
data["pre_tokenizer"]["pretokenizers"][0][
1243-
"behavior"
1244-
] = "MergedWithPrevious"
1245-
file.seek(0)
1246-
json.dump(data, file, indent=4)
1247-
file.truncate()
1240+
# Error: libc++abi: terminating due to uncaught exception of type std::runtime_error: Unsupported Normalizer type: NFC.
1241+
data.pop("normalizer")
1242+
file.seek(0)
1243+
json.dump(data, file, indent=4)
1244+
file.truncate()
12481245
else:
1249-
raise RuntimeError(f"Unknown decoder_model: {args.llama_model}.")
1246+
raise RuntimeError(f"Unknown decoder_model: {args.decoder_model}.")
12501247

12511248
if args.kv_updater == "smart_mask":
12521249
args.shared_buffer = True

0 commit comments

Comments
 (0)