quic-abhamidi
diff --git a/‎QEfficient/utils/test_utils.py‎
Lines changed: 92 additions & 0 deletions b/‎QEfficient/utils/test_utils.py‎
Lines changed: 92 additions & 0 deletions
diff --git a/‎scripts/Jenkinsfile‎
Lines changed: 4 additions & 3 deletions b/‎scripts/Jenkinsfile‎
Lines changed: 4 additions & 3 deletions
diff --git a/‎tests/configs/causal_model_configs.json‎
Lines changed: 51 additions & 1 deletion b/‎tests/configs/causal_model_configs.json‎
Lines changed: 51 additions & 1 deletion
@@ -9,6 +9,68 @@
 import torch.nn as nn
 import torchvision.transforms as T
 from torchvision.transforms.functional import InterpolationMode
+from transformers import (
+    AutoModelForCausalLM,
+    AutoModelForImageTextToText,
+)
+
+
+def load_vlm_model(config):
+    try:
+        model_hf = AutoModelForImageTextToText.from_pretrained(
+            config._name_or_path,
+            low_cpu_mem_usage=False,
+            config=config,
+        )
+    except ValueError:
+        model_hf = AutoModelForCausalLM.from_pretrained(
+            config._name_or_path,
+            low_cpu_mem_usage=False,
+            trust_remote_code=True,
+            config=config,
+        )
+    model_hf.eval()
+    return model_hf
+
+
+def load_vlm_model_from_config(config):
+    try:
+        model_hf = AutoModelForImageTextToText.from_config(
+            config,
+            attn_implementation="eager",
+            trust_remote_code=True,
+        )
+    except ValueError:
+        model_hf = AutoModelForCausalLM.from_config(
+            config,
+            attn_implementation="eager",
+            trust_remote_code=True,
+        )
+    torch_dtype = getattr(model_hf.config, "torch_dtype", None)
+    if torch_dtype == torch.bfloat16 or torch_dtype == torch.float16:
+        model_hf = model_hf.to(torch.float32)
+    model_hf.eval()
+    return model_hf
+
+
+def set_num_layers_vlm(config, n_layer=1):
+    ## -1 indicates use all the layers of the model.
+    if n_layer == -1:
+        return config
+    elif hasattr(config, "model_type") and "mllama" in config.model_type:
+        config.text_config.num_hidden_layers = n_layer
+        config.text_config.cross_attention_layers = [
+            x for x in config.text_config.cross_attention_layers if x < n_layer
+        ]
+    elif hasattr(config, "text_config"):
+        config.text_config.num_hidden_layers = n_layer
+        config.vision_config.num_hidden_layers = n_layer
+    elif hasattr(config, "llm_config"):
+        config.llm_config.num_hidden_layers = n_layer
+        config.vision_config.num_hidden_layers = n_layer
+    else:
+        config.num_hidden_layers = n_layer
+    return config
 
 
 # Processor class for InternVL models
@@ -169,6 +231,36 @@ class ModelConfig:
         "TheBloke/TinyLlama-1.1B-Chat-v0.3-AWQ",
     }
 
+    STANDARD_VLM_MODELS = {
+        "llava-hf/llava-1.5-7b-hf",
+        "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+        "google/gemma-3-4b-it",
+        "mistralai/Mistral-Small-3.1-24B-Instruct-2503",
+        "Qwen/Qwen2.5-VL-3B-Instruct",
+        "meta-llama/Llama-3.2-11B-Vision-Instruct",
+    }
+
+    INTERNVL_MODELS = {
+        "OpenGVLab/InternVL2_5-1B",
+        "OpenGVLab/InternVL3_5-1B",
+    }
+
+    MOLMO_MODELS = {
+        "allenai/Molmo-7B-D-0924",
+    }
+
+    SKIPPED_MODELS = {
+        "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+        "allenai/Molmo-7B-D-0924",
+        "meta-llama/Llama-3.2-11B-Vision-Instruct",
+    }
+
+    DUAL_QPC_MODELS = {
+        "OpenGVLab/InternVL2_5-1B",
+        "OpenGVLab/InternVL3_5-1B",
+        "Qwen/Qwen2.5-VL-3B-Instruct",
+    }
+
     EXTERNAL_MODELS = {
         "hpcai-tech/grok-1": {
             "pytorch_hf_tokens_custom_case": [
 
@@ -94,7 +94,8 @@ pipeline {
                            mkdir -p $PWD/Non_cli_qaic_multimodal &&
                            export TOKENIZERS_PARALLELISM=false &&
                            export QEFF_HOME=$PWD/Non_cli_qaic_multimodal &&
-                           pytest tests -m '(not cli) and (on_qaic) and (multimodal) and (not qnn) and (not finetune) and (not diffusion_models)' --ignore tests/vllm --ignore tests/unit_test --junitxml=tests/tests_log6.xml --durations=10 &&
+                           pytest tests -m '(not cli) and (on_qaic) and (multimodal) and (not qnn) and (not finetune) and (not diffusion_models)  and (not nightly)' --ignore tests/vllm --ignore tests/unit_test --junitxml=tests/tests_log6.xml --durations=10 &&
+
                            junitparser merge tests/tests_log6.xml tests/tests_log.xml &&
                            deactivate"
                            '''
@@ -203,9 +204,9 @@ pipeline {
                     cd /efficient-transformers &&
                     . preflight_qeff/bin/activate &&
                     # TODO: Update torch_qaic path to py312 when migrating to Python 3.12
-                    pip install /opt/qti-aic/integrations/torch_qaic/py312/torch_qaic-0.1.0-cp312-cp312-linux_x86_64.whl &&
+                    pip install /opt/qti-aic/integrations/torch_qaic/py312/torch_qaic-0.1.0-cp312-cp312-manylinux_2_34_x86_64.whl &&
                     # pip install /opt/qti-aic/integrations/torch_qaic/py310/torch_qaic-0.1.0-cp310-cp310-linux_x86_64.whl &&
-                    pip install torch==2.9.0 torchvision==0.24.0 torchaudio==2.9.0 --index-url https://download.pytorch.org/whl/cpu &&
+                    pip install torch==2.9.1 torchvision==0.24.1 torchaudio==2.9.1 --index-url https://download.pytorch.org/whl/cpu &&
                     mkdir -p $PWD/cli_qaic_finetuning &&
                     export TOKENIZERS_PARALLELISM=false &&
                     export QEFF_HOME=$PWD/cli_qaic_finetuning &&
 
@@ -487,5 +487,55 @@
         }
       }
     }
+  ],
+  "disaggregated_causal_lm_models": [
+    {
+      "model_name": "openai/gpt-oss-120b",
+      "model_type": "gpt_oss",
+      "additional_params": {
+        "num_hidden_layers": 2,
+        "hidden_size": 64,
+        "intermediate_size": 256,
+        "num_attention_heads": 2,
+        "num_key_value_heads": 1,
+        "num_local_experts": 4
+      }
+    }
+  ],
+  "disaggregated_dummy_models": [
+    {
+      "model_name": "openai/gpt-oss-20b",
+      "model_type": "gpt_oss",
+      "tokenizer_id": "gpt2",
+      "additional_params": {
+        "num_hidden_layers": 2,
+        "hidden_size": 64,
+        "intermediate_size": 256,
+        "num_attention_heads": 2,
+        "num_key_value_heads": 1,
+        "num_local_experts": 4,
+        "head_dim": 32,
+        "max_position_embeddings": 512,
+        "vocab_size": 201088,
+        "sliding_window": 128
+      }
+    },
+    {
+      "model_name": "Qwen/Qwen3-30B-A3B-Instruct-2507",
+      "model_type": "qwen3_moe",
+      "additional_params": {
+        "hidden_size": 256,
+        "intermediate_size": 256,
+        "max_position_embeddings": 512,
+        "max_window_layers": 48,
+        "moe_intermediate_size": 768,
+        "num_attention_heads": 2,
+        "num_experts": 4,
+        "num_experts_per_tok": 2,
+        "num_hidden_layers": 2,
+        "num_key_value_heads": 1,
+        "vocab_size": 151936
+      }
+    }
   ]
-}
+}