vllm-project · mgoin · Jan 29, 2025 · Jan 24, 2025 · Jan 24, 2025 · horheynm
diff --git a/src/llmcompressor/modifiers/quantization/gptq/base.py b/src/llmcompressor/modifiers/quantization/gptq/base.py
@@ -247,7 +247,8 @@ def on_initialize(self, state: State, **kwargs) -> bool:
                 warnings.warn(
                     f"Failed to trace {model_name} with inputs {input_names}. For more "
                     "information on tracing with the sequential pipeline, see "
-                    "`src/llmcompressor/transformers/tracing/GUIDE.md`"
+                    "https://github.com/vllm-project/llm-compressor/blob/main/"
+                    "src/llmcompressor/transformers/tracing/GUIDE.md"
                 )
             if isinstance(exception, unfixable_errors):
                 raise exception

diff --git a/src/llmcompressor/modifiers/smoothquant/utils.py b/src/llmcompressor/modifiers/smoothquant/utils.py
@@ -1,5 +1,4 @@
 import functools
-import pathlib
 from collections import namedtuple
 from typing import Dict, List, Tuple, Union
 
@@ -94,7 +93,10 @@ def wrapper(*args, **kwargs):
         try:
             return func(*args, **kwargs)
         except Exception as original_exception:
-            readme_location = pathlib.Path(__file__).parent / "README.md"
+            readme_location = (
+                "https://github.com/vllm-project/llm-compressor/tree/main/"
+                "src/llmcompressor/modifiers/smoothquant"
+            )
             raise RuntimeError(
                 f"Error resolving mappings for given architecture."
                 f"Please refer to the README at {readme_location} for more information."

@@ -1091,9 +1091,11 @@ def DisableQuantization(model: torch.nn.Module):
     """
     Disable quantization from QuantizationModifier
     """
-    model.apply(disable_quantization)
-    yield
-    model.apply(enable_quantization)
+    try:
+        model.apply(disable_quantization)
+        yield
+    finally:
+        model.apply(enable_quantization)
 
 
 @contextlib.contextmanager

diff --git a/tests/llmcompressor/modifiers/smoothquant/test_utils.py b/tests/llmcompressor/modifiers/smoothquant/test_utils.py
@@ -12,7 +12,10 @@
 
 @pytest.mark.unit
 def test_handle_mapping_resolution_errors():
-    README_LOCATION = "llmcompressor/modifiers/smoothquant/README.md"
+    README_LOCATION = (
+        "https://github.com/vllm-project/llm-compressor/tree/main/"
+        "src/llmcompressor/modifiers/smoothquant"
+    )
 
     @handle_mapping_resolution_errors
     def func_that_raises_exception():