vllm-project
diff --git a/‎.github/workflows/test-check-transformers.yaml‎
Lines changed: 33 additions & 1 deletion b/‎.github/workflows/test-check-transformers.yaml‎
Lines changed: 33 additions & 1 deletion
diff --git a/‎.gitignore‎
Lines changed: 1 addition & 0 deletions b/‎.gitignore‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎README.md‎
Lines changed: 2 additions & 3 deletions b/‎README.md‎
Lines changed: 2 additions & 3 deletions
diff --git a/‎examples/big_models_with_accelerate/cpu_offloading_fp8.py‎
Lines changed: 1 addition & 1 deletion b/‎examples/big_models_with_accelerate/cpu_offloading_fp8.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/big_models_with_accelerate/mult_gpus_int8_device_map.py‎
Lines changed: 1 addition & 1 deletion b/‎examples/big_models_with_accelerate/mult_gpus_int8_device_map.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/big_models_with_accelerate/multi_gpu_int8.py‎
Lines changed: 1 addition & 1 deletion b/‎examples/big_models_with_accelerate/multi_gpu_int8.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/multimodal_audio/whisper_example.py‎
Lines changed: 1 addition & 1 deletion b/‎examples/multimodal_audio/whisper_example.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/multimodal_vision/idefics3_example.py‎
Lines changed: 1 addition & 1 deletion b/‎examples/multimodal_vision/idefics3_example.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/multimodal_vision/llava_example.py‎
Lines changed: 1 addition & 1 deletion b/‎examples/multimodal_vision/llava_example.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/multimodal_vision/mllama_example.py‎
Lines changed: 1 addition & 1 deletion b/‎examples/multimodal_vision/mllama_example.py‎
Lines changed: 1 addition & 1 deletion
@@ -15,9 +15,41 @@ env:
   CLEARML_API_SECRET_KEY: ${{ secrets.CLEARML_API_SECRET_KEY }}
 
 jobs:
+  detect-changes:
+    runs-on: ubuntu-latest
+
+    outputs:
+      changes-present: ${{ steps.changed-files.outputs.any_modified }}
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+      - name: Get changed files
+        id: changed-files
+        uses: tj-actions/changed-files@v45
+        with:
+          files: |
+            **
+            !examples/**
+            !tests/e2e/**
+            !tests/lmeval/**
+            !tests/examples/**
+            !**/*.md
+            !.github/**
+            .github/workflows/test-check-transformers.yaml
+
+      - name: Log relevant output
+        run: |
+          echo "changes-present: ${{ steps.changed-files.outputs.any_modified }}"
+          echo "all modified files: ${{ steps.changed-files.outputs.all_modified_files }}"
+        shell: bash
+
   transformers-tests:
+    needs: [detect-changes]
     runs-on: gcp-k8s-vllm-l4-solo
-    if: contains(github.event.pull_request.labels.*.name, 'ready') || github.event_name == 'push'
+    if: (contains(github.event.pull_request.labels.*.name, 'ready') || github.event_name == 'push') && needs.detect-changes.outputs.changes-present == 'true'
     steps:
       - uses: actions/setup-python@v5
         with:
 
@@ -800,5 +800,6 @@ integrations/pytorch/pytorch_vision*
 nm_temp_test_logs/*
 sparse_logs/*
 wandb/
+timings/
 output_finetune/
 env_log.json
@@ -82,10 +82,9 @@ Note that the model can be swapped for a local or remote HF-compatible checkpoin
 Quantization is applied by selecting an algorithm and calling the `oneshot` API.
 
 ```python
-from llmcompressor.modifiers.quantization import GPTQModifier
 from llmcompressor.modifiers.smoothquant import SmoothQuantModifier
-from llmcompressor.transformers import oneshot
-from transformers import AutoModelForCausalLM
+from llmcompressor.modifiers.quantization import GPTQModifier
+from llmcompressor import oneshot
 
 # Select quantization algorithm. In this case, we:
 #   * apply SmoothQuant to make the activations easier to quantize
 
@@ -1,7 +1,7 @@
 from transformers import AutoModelForCausalLM, AutoTokenizer
 
+from llmcompressor import oneshot
 from llmcompressor.modifiers.quantization import QuantizationModifier
-from llmcompressor.transformers import oneshot
 
 MODEL_ID = "meta-llama/Meta-Llama-3-70B-Instruct"
 OUTPUT_DIR = MODEL_ID.split("/")[1] + "-FP8-Dynamic"
 
@@ -2,9 +2,9 @@
 from datasets import load_dataset
 from transformers import AutoModelForCausalLM, AutoTokenizer
 
+from llmcompressor import oneshot
 from llmcompressor.modifiers.quantization import GPTQModifier
 from llmcompressor.modifiers.smoothquant import SmoothQuantModifier
-from llmcompressor.transformers import oneshot
 from llmcompressor.transformers.compression.helpers import calculate_offload_device_map
 
 MODEL_ID = "meta-llama/Meta-Llama-3-70B-Instruct"
 
@@ -1,8 +1,8 @@
 from datasets import load_dataset
 from transformers import AutoModelForCausalLM, AutoTokenizer
 
+from llmcompressor import oneshot
 from llmcompressor.modifiers.quantization import GPTQModifier
-from llmcompressor.transformers import oneshot
 
 MODEL_ID = "meta-llama/Meta-Llama-3-70B-Instruct"
 SAVE_DIR = MODEL_ID.split("/")[1] + "-W8A8-Dynamic"
 
@@ -2,8 +2,8 @@
 from datasets import load_dataset
 from transformers import WhisperProcessor
 
+from llmcompressor import oneshot
 from llmcompressor.modifiers.quantization import GPTQModifier
-from llmcompressor.transformers import oneshot
 from llmcompressor.transformers.tracing import TraceableWhisperForConditionalGeneration
 
 # Select model and load it.
 
@@ -4,8 +4,8 @@
 from PIL import Image
 from transformers import AutoProcessor
 
+from llmcompressor import oneshot
 from llmcompressor.modifiers.quantization import GPTQModifier
-from llmcompressor.transformers import oneshot
 from llmcompressor.transformers.tracing import TraceableIdefics3ForConditionalGeneration
 
 # Load model.
 
@@ -3,8 +3,8 @@
 from PIL import Image
 from transformers import AutoProcessor
 
+from llmcompressor import oneshot
 from llmcompressor.modifiers.quantization import GPTQModifier
-from llmcompressor.transformers import oneshot
 from llmcompressor.transformers.tracing import TraceableLlavaForConditionalGeneration
 
 # Load model.
 
@@ -3,8 +3,8 @@
 from PIL import Image
 from transformers import AutoProcessor
 
+from llmcompressor import oneshot
 from llmcompressor.modifiers.quantization import GPTQModifier
-from llmcompressor.transformers import oneshot
 from llmcompressor.transformers.tracing import TraceableMllamaForConditionalGeneration
 
 # Load model.