Skip to content

Commit 32a2235

Browse files
abukhoyquic-rishinrAnn Kuruvilla
authored
[CI-Time]: Adding dummy Models to Improve Tests Time (quic#814)
CI test update to reduce the time taken for each models --------- Signed-off-by: Abukhoyer Shaik <abukhoye@qti.qualcomm.com> Signed-off-by: Abukhoyer SHaik <abukhoye@qti.qualcomm.com> Signed-off-by: Rishin Raj <rishinr@qti.qualcomm.com> Signed-off-by: Ann Kuruvilla <akuruvil@qti.qualcomm.com> Co-authored-by: Rishin Raj <rishinr@qti.qualcomm.com> Co-authored-by: Ann Kuruvilla <akuruvil@qti.qualcomm.com>
1 parent a071142 commit 32a2235

File tree

12 files changed

+1033
-467
lines changed

12 files changed

+1033
-467
lines changed

QEfficient/utils/test_utils.py

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,68 @@
99
import torch.nn as nn
1010
import torchvision.transforms as T
1111
from torchvision.transforms.functional import InterpolationMode
12+
from transformers import (
13+
AutoModelForCausalLM,
14+
AutoModelForImageTextToText,
15+
)
16+
17+
18+
def load_vlm_model(config):
19+
try:
20+
model_hf = AutoModelForImageTextToText.from_pretrained(
21+
config._name_or_path,
22+
low_cpu_mem_usage=False,
23+
config=config,
24+
)
25+
except ValueError:
26+
model_hf = AutoModelForCausalLM.from_pretrained(
27+
config._name_or_path,
28+
low_cpu_mem_usage=False,
29+
trust_remote_code=True,
30+
config=config,
31+
)
32+
model_hf.eval()
33+
return model_hf
34+
35+
36+
def load_vlm_model_from_config(config):
37+
try:
38+
model_hf = AutoModelForImageTextToText.from_config(
39+
config,
40+
attn_implementation="eager",
41+
trust_remote_code=True,
42+
)
43+
except ValueError:
44+
model_hf = AutoModelForCausalLM.from_config(
45+
config,
46+
attn_implementation="eager",
47+
trust_remote_code=True,
48+
)
49+
torch_dtype = getattr(model_hf.config, "torch_dtype", None)
50+
if torch_dtype == torch.bfloat16 or torch_dtype == torch.float16:
51+
model_hf = model_hf.to(torch.float32)
52+
model_hf.eval()
53+
return model_hf
54+
55+
56+
def set_num_layers_vlm(config, n_layer=1):
57+
## -1 indicates use all the layers of the model.
58+
if n_layer == -1:
59+
return config
60+
elif hasattr(config, "model_type") and "mllama" in config.model_type:
61+
config.text_config.num_hidden_layers = n_layer
62+
config.text_config.cross_attention_layers = [
63+
x for x in config.text_config.cross_attention_layers if x < n_layer
64+
]
65+
elif hasattr(config, "text_config"):
66+
config.text_config.num_hidden_layers = n_layer
67+
config.vision_config.num_hidden_layers = n_layer
68+
elif hasattr(config, "llm_config"):
69+
config.llm_config.num_hidden_layers = n_layer
70+
config.vision_config.num_hidden_layers = n_layer
71+
else:
72+
config.num_hidden_layers = n_layer
73+
return config
1274

1375

1476
# Processor class for InternVL models
@@ -169,6 +231,36 @@ class ModelConfig:
169231
"TheBloke/TinyLlama-1.1B-Chat-v0.3-AWQ",
170232
}
171233

234+
STANDARD_VLM_MODELS = {
235+
"llava-hf/llava-1.5-7b-hf",
236+
"meta-llama/Llama-4-Scout-17B-16E-Instruct",
237+
"google/gemma-3-4b-it",
238+
"mistralai/Mistral-Small-3.1-24B-Instruct-2503",
239+
"Qwen/Qwen2.5-VL-3B-Instruct",
240+
"meta-llama/Llama-3.2-11B-Vision-Instruct",
241+
}
242+
243+
INTERNVL_MODELS = {
244+
"OpenGVLab/InternVL2_5-1B",
245+
"OpenGVLab/InternVL3_5-1B",
246+
}
247+
248+
MOLMO_MODELS = {
249+
"allenai/Molmo-7B-D-0924",
250+
}
251+
252+
SKIPPED_MODELS = {
253+
"meta-llama/Llama-4-Scout-17B-16E-Instruct",
254+
"allenai/Molmo-7B-D-0924",
255+
"meta-llama/Llama-3.2-11B-Vision-Instruct",
256+
}
257+
258+
DUAL_QPC_MODELS = {
259+
"OpenGVLab/InternVL2_5-1B",
260+
"OpenGVLab/InternVL3_5-1B",
261+
"Qwen/Qwen2.5-VL-3B-Instruct",
262+
}
263+
172264
EXTERNAL_MODELS = {
173265
"hpcai-tech/grok-1": {
174266
"pytorch_hf_tokens_custom_case": [

scripts/Jenkinsfile

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,8 @@ pipeline {
9494
mkdir -p $PWD/Non_cli_qaic_multimodal &&
9595
export TOKENIZERS_PARALLELISM=false &&
9696
export QEFF_HOME=$PWD/Non_cli_qaic_multimodal &&
97-
pytest tests -m '(not cli) and (on_qaic) and (multimodal) and (not qnn) and (not finetune) and (not diffusion_models)' --ignore tests/vllm --ignore tests/unit_test --junitxml=tests/tests_log6.xml --durations=10 &&
97+
pytest tests -m '(not cli) and (on_qaic) and (multimodal) and (not qnn) and (not finetune) and (not diffusion_models) and (not nightly)' --ignore tests/vllm --ignore tests/unit_test --junitxml=tests/tests_log6.xml --durations=10 &&
98+
9899
junitparser merge tests/tests_log6.xml tests/tests_log.xml &&
99100
deactivate"
100101
'''
@@ -203,9 +204,9 @@ pipeline {
203204
cd /efficient-transformers &&
204205
. preflight_qeff/bin/activate &&
205206
# TODO: Update torch_qaic path to py312 when migrating to Python 3.12
206-
pip install /opt/qti-aic/integrations/torch_qaic/py312/torch_qaic-0.1.0-cp312-cp312-linux_x86_64.whl &&
207+
pip install /opt/qti-aic/integrations/torch_qaic/py312/torch_qaic-0.1.0-cp312-cp312-manylinux_2_34_x86_64.whl &&
207208
# pip install /opt/qti-aic/integrations/torch_qaic/py310/torch_qaic-0.1.0-cp310-cp310-linux_x86_64.whl &&
208-
pip install torch==2.9.0 torchvision==0.24.0 torchaudio==2.9.0 --index-url https://download.pytorch.org/whl/cpu &&
209+
pip install torch==2.9.1 torchvision==0.24.1 torchaudio==2.9.1 --index-url https://download.pytorch.org/whl/cpu &&
209210
mkdir -p $PWD/cli_qaic_finetuning &&
210211
export TOKENIZERS_PARALLELISM=false &&
211212
export QEFF_HOME=$PWD/cli_qaic_finetuning &&

tests/configs/causal_model_configs.json

Lines changed: 51 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -487,5 +487,55 @@
487487
}
488488
}
489489
}
490+
],
491+
"disaggregated_causal_lm_models": [
492+
{
493+
"model_name": "openai/gpt-oss-120b",
494+
"model_type": "gpt_oss",
495+
"additional_params": {
496+
"num_hidden_layers": 2,
497+
"hidden_size": 64,
498+
"intermediate_size": 256,
499+
"num_attention_heads": 2,
500+
"num_key_value_heads": 1,
501+
"num_local_experts": 4
502+
}
503+
}
504+
],
505+
"disaggregated_dummy_models": [
506+
{
507+
"model_name": "openai/gpt-oss-20b",
508+
"model_type": "gpt_oss",
509+
"tokenizer_id": "gpt2",
510+
"additional_params": {
511+
"num_hidden_layers": 2,
512+
"hidden_size": 64,
513+
"intermediate_size": 256,
514+
"num_attention_heads": 2,
515+
"num_key_value_heads": 1,
516+
"num_local_experts": 4,
517+
"head_dim": 32,
518+
"max_position_embeddings": 512,
519+
"vocab_size": 201088,
520+
"sliding_window": 128
521+
}
522+
},
523+
{
524+
"model_name": "Qwen/Qwen3-30B-A3B-Instruct-2507",
525+
"model_type": "qwen3_moe",
526+
"additional_params": {
527+
"hidden_size": 256,
528+
"intermediate_size": 256,
529+
"max_position_embeddings": 512,
530+
"max_window_layers": 48,
531+
"moe_intermediate_size": 768,
532+
"num_attention_heads": 2,
533+
"num_experts": 4,
534+
"num_experts_per_tok": 2,
535+
"num_hidden_layers": 2,
536+
"num_key_value_heads": 1,
537+
"vocab_size": 151936
538+
}
539+
}
490540
]
491-
}
541+
}

0 commit comments

Comments
 (0)