Skip to content

Commit dde5562

Browse files
committed
Resolve rebase conflict
1 parent 3798202 commit dde5562

File tree

4 files changed

+18
-18
lines changed

4 files changed

+18
-18
lines changed

backends/qualcomm/tests/test_qnn_delegate.py

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -4615,11 +4615,14 @@ def test_static_qwen3(self):
46154615
"--decoder_model",
46164616
"qwen3-0_6b",
46174617
"--model_mode",
4618-
"hybrid",
4619-
"--prefill_ar_len",
4620-
"32",
4618+
"kv",
46214619
"--max_seq_len",
4622-
"128",
4620+
"1024",
4621+
"--eval_perplexity",
4622+
"--tasks",
4623+
"wikitext",
4624+
"--limit",
4625+
"1",
46234626
"--r3",
46244627
"--enable_masked_softmax",
46254628
]
@@ -4634,8 +4637,6 @@ def test_static_qwen3(self):
46344637
if self.pre_gen_pte:
46354638
cmds.extend(["--pre_gen_pte", self.pre_gen_pte])
46364639

4637-
# TODO: Change to PPL evaluation
4638-
golden_start_with = "<|im_start|>user"
46394640
p = subprocess.Popen(cmds, stdout=subprocess.DEVNULL)
46404641
with Listener((self.ip, self.port)) as listener:
46414642
conn = listener.accept()
@@ -4644,12 +4645,13 @@ def test_static_qwen3(self):
46444645
if "Error" in msg:
46454646
self.fail(msg["Error"])
46464647
else:
4647-
model_out = msg["result"][0]
4648-
self.assertTrue(
4649-
model_out.startswith(golden_start_with),
4650-
f"Expected Output: {golden_start_with}. Actual Output: {model_out}",
4651-
)
4652-
self.assertGreaterEqual(msg["inference_speed"], 70) # Lanai
4648+
inference_speed_ref = {"SM8650": 38, "SM8750": 56}
4649+
self.assertLessEqual(msg["wiki_ppl"], 18)
4650+
self.assertLessEqual(msg["pte_size"], 950_000_000) # 950mb
4651+
if self.model in inference_speed_ref:
4652+
self.assertGreaterEqual(
4653+
msg["inference_speed"], inference_speed_ref[self.model]
4654+
)
46534655

46544656
def test_smollm2(self):
46554657
if not self.required_envs():

examples/qualcomm/oss_scripts/llama/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ python examples/qualcomm/oss_scripts/llama/llama.py -b build-android -s ${SERIAL
9696
#### SMOLLM2
9797
Default example using hybrid mode.
9898
```bash
99-
python examples/qualcomm/oss_scripts/llama/llama.py -b build-android -H mlgtw-linux -s ${SERIAL_NUM} -m ${SOC_MODEL} --ptq 16a8w --tokenizer_bin tokenizer.bin --decoder_model smollm2 --model_mode hybrid --prefill_ar_len 128 --max_seq_len 1024 --prompt "I would like to learn python, could you teach me with a simple example?"
99+
python examples/qualcomm/oss_scripts/llama/llama.py -b build-android -H mlgtw-linux -s ${SERIAL_NUM} -m ${SOC_MODEL} --ptq 16a8w --decoder_model smollm2_135m --model_mode hybrid --prefill_ar_len 128 --max_seq_len 1024 --prompt "I would like to learn python, could you teach me with a simple example?"
100100
```
101101

102102
### KV Cache update mechanism

examples/qualcomm/oss_scripts/llama/__init__.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,23 +19,21 @@
1919
from executorch.examples.models.smollm2 import (
2020
convert_weights as convert_smollm2_weights,
2121
)
22-
from executorch.examples.qualcomm.oss_scripts.llama.decoder_constants import (
23-
DECODER_MODEL_VERSION,
24-
)
2522

2623
BASE_DIR = os.path.dirname(__file__)
2724

2825

2926
@dataclass(init=False, frozen=True)
3027
class HFModel(ABC):
31-
""" Base class for all hugging face models
28+
"""Base class for all hugging face models
3229
3330
repo_id: Hugging Face Repo ID.
3431
params_path: Path to model's config.json. If the corresponding .json has not yet exsit, please create one.
3532
convert_weights: Used to convert Hugging Face weights parameters to Static Decoder's parameter naming.
3633
transform_weight: Set to true to change HuggingFace weight to improve the performance of RoPE in HTP backend.
3734
instruct_model: True if the model uses chat templates. Check Hugging Face model card to ensure the model uses chat templates.
3835
"""
36+
3937
repo_id: str
4038
params_path: str
4139
convert_weights: Callable

examples/qualcomm/oss_scripts/llama/llama.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1248,7 +1248,7 @@ def export_llama(args) -> None:
12481248
chat_template = (
12491249
tokenizer.apply_chat_template
12501250
if hasattr(tokenizer, "apply_chat_template")
1251-
and SUPPORTED_HF_MODELS[args.decoder_model].transform_weight
1251+
and SUPPORTED_HF_MODELS[args.decoder_model].instruct_model
12521252
else None
12531253
)
12541254
runtime_tokenizer_path = tokenizer.save_pretrained(args.artifact)[-1]

0 commit comments

Comments
 (0)