update to allow READ only access

andy-neuma · dhuangnm · commit b9cdcecebdcf · 2025-10-21T12:10:43.000-04:00
Signed-off-by: andy-neuma &lt;andy@neuralmagic.com&gt;
diff --git a/tests/test_quantization/lifecycle/test_apply.py b/tests/test_quantization/lifecycle/test_apply.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import re
+import shutil
 from typing import Optional
 from unittest.mock import MagicMock
 
@@ -34,6 +35,16 @@
 from transformers import AutoModelForCausalLM
 
 
+@pytest.fixture(scope="module", autouse=True)
+def cleanup_model_cache():
+    """Clean up the test model cache directory after all tests complete."""
+    yield
+    try:
+        shutil.rmtree("test-apply-model-cache", ignore_errors=True)
+    except Exception:
+        pass
+
+
 @pytest.fixture
 def mock_model():
     model = MagicMock()
@@ -55,6 +66,7 @@ def llama_stories_model():
     return AutoModelForCausalLM.from_pretrained(
         "Xenova/llama2.c-stories15M",
         torch_dtype="auto",
+        cache_dir="test-apply-model-cache",
     )
 
 
@@ -87,7 +99,8 @@ def test_target_prioritization(mock_frozen):
     }
 
     model = AutoModelForCausalLM.from_pretrained(
-        "HuggingFaceM4/tiny-random-LlamaForCausalLM", torch_dtype="auto"
+        "HuggingFaceM4/tiny-random-LlamaForCausalLM", torch_dtype="auto",
+        cache_dir="test-apply-model-cache"
     )
     model.eval()
 
@@ -185,6 +198,7 @@ def get_tinyllama_model():
     return AutoModelForCausalLM.from_pretrained(
         "TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T",
         torch_dtype="auto",
+        cache_dir="test-apply-model-cache",
     )