diff --git a/.github/workflows/test-check-transformers.yaml b/.github/workflows/test-check-transformers.yaml
index e673a63bad..85bce9c8da 100644
--- a/.github/workflows/test-check-transformers.yaml
+++ b/.github/workflows/test-check-transformers.yaml
@@ -98,4 +98,4 @@ jobs:
       - name: Running KV Cache Tests
         if: (success() || failure()) && steps.install.outcome == 'success'
         run: |
-          pytest -v tests/llmcompressor/transformers/kv_cache
+          pytest -v tests/llmcompressor/transformers/kv_cache -k "not test_kv_cache_gptq_model_state_dict_attr"
diff --git a/src/llmcompressor/args/dataset_arguments.py b/src/llmcompressor/args/dataset_arguments.py
index 31f7c73bb3..9f9cb07c01 100644
--- a/src/llmcompressor/args/dataset_arguments.py
+++ b/src/llmcompressor/args/dataset_arguments.py
@@ -3,6 +3,8 @@
 
 from transformers import DefaultDataCollator
 
+from llmcompressor.pipelines.registry import PIPELINES
+
 
 @dataclass
 class DVCDatasetArguments:
@@ -171,3 +173,17 @@ class DatasetArguments(CustomDatasetArguments):
             "will execute code present on the Hub on your local machine."
         },
     )
+    pipeline: Optional[str] = field(
+        default="independent",
+        metadata={
+            "help": "Calibration pipeline used to calibrate model. "
+            f"Options: {PIPELINES.keys()}"
+        },
+    )
+    tracing_ignore: List[str] = field(
+        default_factory=lambda: ["_update_causal_mask"],
+        metadata={
+            "help": "List of functions to ignore during tracing, either "
+            "{module}.{method_name} or {function_name}"
+        },
+    )
diff --git a/src/llmcompressor/core/events/event.py b/src/llmcompressor/core/events/event.py
index 89eb780c85..c20fe4066e 100644
--- a/src/llmcompressor/core/events/event.py
+++ b/src/llmcompressor/core/events/event.py
@@ -44,6 +44,8 @@ class EventType(Enum):
     BATCH_START = "batch_start"
     LOSS_CALCULATED = "loss_calculated"
     BATCH_END = "batch_end"
+    SEQUENTIAL_EPOCH_END = "sequential_epoch_end"
+    CALIBRATION_EPOCH_END = "calibration_epoch_end"
 
     # step lifecycle
     OPTIM_PRE_STEP = "optim_pre_step"
diff --git a/src/llmcompressor/core/lifecycle.py b/src/llmcompressor/core/lifecycle.py
index ff91c70c8e..c1344da075 100644
--- a/src/llmcompressor/core/lifecycle.py
+++ b/src/llmcompressor/core/lifecycle.py
@@ -77,6 +77,15 @@ def reset(self):
         self.__init__()
         logger.info("Compression lifecycle reset")
 
+    def initialize_recipe(
+        self,
+        recipe: Optional[RecipeInput] = None,
+        recipe_stage: Optional[RecipeStageInput] = None,
+        recipe_args: Optional[RecipeArgsInput] = None,
+    ):
+        self.recipe_container.append(recipe, recipe_stage, recipe_args)
+        self.modifiers = self.recipe_container.get_modifiers()
+
     def initialize(
         self,
         recipe: Optional[RecipeInput] = None,
@@ -92,12 +101,10 @@ def initialize(
         :rtype: List[Any]
         """
         self.state.update(**kwargs)
-        if self.initialized_:  # TODO: do not initialize twice
-            return
 
         logger.debug("Initializing compression lifecycle")
-        self.recipe_container.append(recipe, recipe_stage, recipe_args)
-        self.modifiers = self.recipe_container.get_modifiers()
+        if not (recipe is recipe_stage is recipe_args is None):
+            self.initialize_recipe(recipe, recipe_stage, recipe_args)
         self._set_model_layer_prefix()
 
         mod_data = []
diff --git a/src/llmcompressor/core/session.py b/src/llmcompressor/core/session.py
index 4f21c3f7ad..5651f85118 100644
--- a/src/llmcompressor/core/session.py
+++ b/src/llmcompressor/core/session.py
@@ -220,6 +220,14 @@ def get_serialized_recipe(self) -> Optional[str]:
 
         logger.warning("Recipe not found in session - it may have been reset")
 
+    def get_modifiers(self):
+        stage_modifiers = self.lifecycle.modifiers
+        return [
+            modifier
+            for stage_modifier in stage_modifiers
+            for modifier in stage_modifier.modifiers
+        ]  # noqa: E127
+
     def _log_model_info(self):
         # Log model level logs if cadence reached
         current_index = self._lifecycle.global_step
diff --git a/src/llmcompressor/core/session_functions.py b/src/llmcompressor/core/session_functions.py
index b280febbe7..522fbf8eb2 100644
--- a/src/llmcompressor/core/session_functions.py
+++ b/src/llmcompressor/core/session_functions.py
@@ -1,6 +1,6 @@
 import threading
 from contextlib import contextmanager
-from typing import Any, Optional
+from typing import Any, Generator, Optional
 
 from llmcompressor.core.events import EventType
 from llmcompressor.core.session import CompressionSession
@@ -21,7 +21,7 @@
 
 
 @contextmanager
-def create_session() -> CompressionSession:
+def create_session() -> Generator[CompressionSession, None, None]:
     """
     Context manager to create and yield a new session for sparsification.
     This will set the active session to the new session for the duration
@@ -136,5 +136,26 @@ def batch_end(cls, **kwargs) -> ModifiedState:
         active_session()._log_model_info()
         return cls.event(EventType.BATCH_END, **kwargs)
 
+    @classmethod
+    def sequential_epoch_end(cls, **kwargs) -> ModifiedState:
+        """
+        Invoke a sequential epoch end event for the active session. This event should be
+        called after one sequential layer has been calibrated/trained for one epoch
+
+        This is called after a sequential layer has been calibrated with one batch, see
+        `src/llmcompressor/pipelines/sequential/pipeline.py` for usage example
+        """
+        return cls.event(EventType.SEQUENTIAL_EPOCH_END, **kwargs)
+
+    @classmethod
+    def calibration_epoch_end(cls, **kwargs) -> ModifiedState:
+        """
+        Invoke a epoch end event for the active session during calibration. This event
+        should be called after the model has been calibrated for one epoch
+
+        see `src/llmcompressor/pipelines/basic/pipeline.py` for usage example
+        """
+        return cls.event(EventType.CALIBRATION_EPOCH_END, **kwargs)
+
 
 callbacks = LifecycleCallbacks
diff --git a/src/llmcompressor/entrypoints/oneshot.py b/src/llmcompressor/entrypoints/oneshot.py
index a6df0ee1c9..f770c12dc3 100644
--- a/src/llmcompressor/entrypoints/oneshot.py
+++ b/src/llmcompressor/entrypoints/oneshot.py
@@ -7,6 +7,7 @@
 from llmcompressor.core.session_functions import active_session
 from llmcompressor.datasets import get_calibration_dataloader
 from llmcompressor.entrypoints.utils import post_process, pre_process
+from llmcompressor.pipelines.registry import get_pipeline_fn
 
 __all__ = ["Oneshot", "oneshot"]
 
@@ -157,21 +158,20 @@ def apply_recipe_modifiers(
         """
 
         session = active_session()
+        session.reset()
 
-        session_kwargs = dict(
-            model=self.model,
+        session.lifecycle.state.update(model=self.model, start=-1)
+        session.lifecycle.initialize_recipe(
             recipe=self.recipe,
-            recipe_args=self.recipe_args.recipe_args,
-            calib_data=calibration_dataloader,
-            start=-1,  # oneshot-specific argument
-            copy_data=False,
-            min_tokens_per_module=getattr(self, "min_tokens_per_module", None),
             recipe_stage=recipe_stage,
+            recipe_args=self.recipe_args.recipe_args,
         )
 
-        session.reset()
-        session.initialize(**session_kwargs)
-        session.finalize(**session_kwargs)
+        modifiers = session.get_modifiers()
+        _, pipeline_fn = get_pipeline_fn(self.dataset_args.pipeline, modifiers)
+        pipeline_fn(self.model, calibration_dataloader, self.dataset_args)
+
+        session.finalize()
 
 
 def oneshot(**kwargs) -> PreTrainedModel:
diff --git a/src/llmcompressor/modifiers/modifier.py b/src/llmcompressor/modifiers/modifier.py
index 38911b5901..e91d881e8e 100644
--- a/src/llmcompressor/modifiers/modifier.py
+++ b/src/llmcompressor/modifiers/modifier.py
@@ -89,7 +89,7 @@ def initialize(self, state: State, **kwargs):
 
         self.initialized_ = self.on_initialize(state=state, **kwargs)
 
-        # trigger start
+        # trigger starts
         fake_start_event = Event(type_=EventType.BATCH_START, global_step=0)
         if self.should_start(fake_start_event):
             self.on_start(state, fake_start_event, **kwargs)
@@ -103,8 +103,8 @@ def finalize(self, state: State, **kwargs):
         :param state: The current state of the model
         :param kwargs: Additional arguments for finalizing the modifier
         """
-        if self.finalized_ or not self.initialized_:
-            return
+        if self.finalized_:
+            raise RuntimeError("cannot finalize a modifier twice")
 
         if not self.initialized_:
             raise RuntimeError("cannot finalize an uninitialized modifier")
diff --git a/src/llmcompressor/modifiers/obcq/base.py b/src/llmcompressor/modifiers/obcq/base.py
index ddffdecdc5..54e79461b1 100644
--- a/src/llmcompressor/modifiers/obcq/base.py
+++ b/src/llmcompressor/modifiers/obcq/base.py
@@ -41,9 +41,6 @@ class SparseGPTModifier(SparsityModifierMixin, Modifier):
     Lifecycle:
         - on_initialize
             - register_hook(module, calibrate_module, "forward")
-            - run_sequential / run_layer_sequential / run_basic
-                - make_empty_hessian
-                - accumulate_hessian
         - on_sequential_batch_end
             - sparsify_weight
         - on_finalize
@@ -90,6 +87,14 @@ def calibrate_module(
         args: Tuple[torch.Tensor, ...],
         _output: torch.Tensor,
     ):
+        """
+        Calibration hook used to accumulate the hessian of the input to the module
+
+        :param module: module being calibrated
+        :param args: inputs to the module, the first element of which is the
+            cannonical input
+        :param _output: uncompressed module output, unused
+        """
         # Assume that the first argument is the input
         inp = args[0]
 
@@ -108,10 +113,9 @@ def calibrate_module(
                 self._num_samples[module],
             )
 
-    def on_sequential_batch_end(self):
+    def compress_modules(self):
         """
-        Sparsify modules
-        TODO: implement with event callback
+        Sparsify modules which have been calibrated
         """
         for module in list(self._num_samples.keys()):
             name = self._module_names[module]
@@ -152,7 +156,13 @@ def _maybe_onload_hessian(self, module: torch.nn.Module):
                 self._hessians[module] = self._hessians[module].to(device="cpu")
 
     def on_finalize(self, state: State, **kwargs) -> bool:
-        self.remove_hooks()
+        # TODO: modify lifecycle to end on finalize
+        if not self.ended_:
+            self.on_end(state, None)  # remove hooks
+
+        if len(self._num_samples) > 0:
+            raise ValueError(f"Failed to compress {len(self._num_samples)} modules")
+
         self._hessians = dict()
         self._num_samples = dict()
         self._module_names = dict()
diff --git a/src/llmcompressor/modifiers/obcq/sgpt_mixin.py b/src/llmcompressor/modifiers/obcq/sgpt_mixin.py
index 6cdf5fda80..b4c0bd51c7 100644
--- a/src/llmcompressor/modifiers/obcq/sgpt_mixin.py
+++ b/src/llmcompressor/modifiers/obcq/sgpt_mixin.py
@@ -9,13 +9,10 @@
 from loguru import logger
 from pydantic import Field, PrivateAttr, field_validator, model_validator
 
-from llmcompressor.core import State
+from llmcompressor.core import Event, EventType, State
+from llmcompressor.modifiers.modifier import Modifier
 from llmcompressor.modifiers.utils.hooks import HooksMixin
 from llmcompressor.pipelines.basic import run_pipeline as run_basic
-from llmcompressor.pipelines.layer_sequential import (
-    run_pipeline as run_layer_sequential,
-)
-from llmcompressor.pipelines.sequential import run_pipeline as run_sequential
 from llmcompressor.utils.pytorch.module import (
     get_layers,
     get_no_split_params,
@@ -24,7 +21,7 @@
 )
 
 
-class SparsityModifierMixin(HooksMixin):
+class SparsityModifierMixin(Modifier):
     # modifier arguments
     sparsity: Optional[Union[float, List[float]]]
     sparsity_profile: Optional[str] = None
@@ -97,6 +94,10 @@ def calibrate_module(
     ):
         raise NotImplementedError()
 
+    @abstractmethod
+    def compress_modules(self):
+        raise NotImplementedError()
+
     def on_initialize(self, state: "State", **kwargs) -> bool:
         """
         Initialize and run the OBCQ algorithm on the current state
@@ -160,48 +161,22 @@ def on_initialize(self, state: "State", **kwargs) -> bool:
                 self._module_sparsities[module] = layer_sparsity
                 self.register_hook(module, self.calibrate_module, "forward")
 
-        # infer and run pipeline
-        model_name = state.model.__class__.__name__
-        input_names = dataloader.dataset.column_names
-        unfixable_errors = (torch.OutOfMemoryError, torch._C._LinAlgError)
-        try:
-            run_sequential(
-                state.model,
-                state.data.calib,
-                self.sequential_targets,
-                self.ignore,
-                self,
-            )
-            return True
-
-        except Exception as exception:
-            if isinstance(exception, torch.fx.proxy.TraceError):
-                warnings.warn(f"Failed to trace {model_name} with inputs {input_names}")
-            if isinstance(exception, unfixable_errors):
-                raise exception
-
-            warnings.warn("Falling back to layer_sequential pipeline")
-            try:
-                run_layer_sequential(
-                    state.model,
-                    state.data.calib,
-                    self.sequential_targets,
-                    self,
-                )
-                return True
-
-            except Exception as exception:
-                if isinstance(exception, TypeError):
-                    warnings.warn(f"{model_name} fails layer-wise assumptions")
-                if isinstance(exception, unfixable_errors):
-                    raise exception
-
-                warnings.warn(
-                    "Falling back to basic pipeline, which requires extra memory and "
-                    "may result in decreased accuracy"
-                )
-                run_basic(state.model, state.data.calib, self)
-                return True
+        return True
+
+    def on_event(self, state: State, event: Event, **kwargs):
+        if event.type_ == EventType.SEQUENTIAL_EPOCH_END:
+            self.compress_modules()
+
+        if event.type_ == EventType.CALIBRATION_EPOCH_END:
+            self.compress_modules()
+
+            # TODO: modify lifecycle to end on calibration epoch end
+            if not self.ended_:
+                self.on_end(state, None)
+
+    def on_end(self, state: State, event: Event, **kwargs):
+        self.ended_ = True  # TODO: move to super call
+        self.remove_hooks()
 
     def _infer_sequential_targets(
         self, model: torch.nn.Module
diff --git a/src/llmcompressor/modifiers/pruning/wanda/base.py b/src/llmcompressor/modifiers/pruning/wanda/base.py
index c77cfff81f..bf194268b8 100644
--- a/src/llmcompressor/modifiers/pruning/wanda/base.py
+++ b/src/llmcompressor/modifiers/pruning/wanda/base.py
@@ -74,6 +74,14 @@ def calibrate_module(
         args: Tuple[torch.Tensor, ...],
         _output: torch.Tensor,
     ):
+        """
+        Calibration hook used to accumulate the row scalars of the input to the module
+
+        :param module: module being calibrated
+        :param args: inputs to the module, the first element of which is the
+            cannonical input
+        :param _output: uncompressed module output, unused
+        """
         # Assume that the first argument is the input
         inp = args[0]
 
@@ -91,12 +99,10 @@ def calibrate_module(
             self._num_samples[module],
         )
 
-    def on_sequential_batch_end(self):
+    def compress_modules(self):
         """
-        Sparsify modules
-        TODO: implement with event callback
+        Sparsify modules which have been calibrated
         """
-
         for module in list(self._num_samples.keys()):
             name = self._module_names[module]
             sparsity = self._module_sparsities[module]
@@ -120,7 +126,13 @@ def on_sequential_batch_end(self):
             del self._num_samples[module]
 
     def on_finalize(self, state: State, **kwargs) -> bool:
-        self.remove_hooks()
+        # TODO: modify lifecycle to end on finalize
+        if not self.ended_:
+            self.on_end(state, None)  # remove hooks
+
+        if len(self._num_samples) > 0:
+            raise ValueError(f"Failed to compress {len(self._num_samples)} modules")
+
         self._row_scalars = dict()
         self._num_samples = dict()
         self._module_names = dict()
diff --git a/src/llmcompressor/modifiers/quantization/calibration.py b/src/llmcompressor/modifiers/quantization/calibration.py
index fa19948e86..97a946c6ef 100644
--- a/src/llmcompressor/modifiers/quantization/calibration.py
+++ b/src/llmcompressor/modifiers/quantization/calibration.py
@@ -3,8 +3,8 @@
 import torch
 from compressed_tensors.quantization import (
     KVCacheScaleType,
+    QuantizationScheme,
     QuantizationStatus,
-    is_attention_module,
 )
 from compressed_tensors.quantization.lifecycle.forward import forward_quantize
 from compressed_tensors.quantization.utils import is_kv_cache_quant_scheme
@@ -14,6 +14,7 @@
 
 from llmcompressor.modifiers.quantization.cache import QuantizedKVParameterCache
 from llmcompressor.observers import Observer
+from llmcompressor.utils.helpers import getattr_chain
 
 __all__ = [
     "initialize_observer",
@@ -22,9 +23,10 @@
     "calibrate_output_hook",
     "calibrate_kv_cache_input_hook",
     "calibrate_kv_cache_output_hook",
-    "set_unset_kv_cache",
+    "initialize_quantized_kv_cache",
     "freeze_module_quantization",
     "apply_calibration_status",
+    "reset_quantization_status",
 ]
 
 
@@ -49,10 +51,6 @@ def initialize_observer(
         # no quantization scheme nothing to do
         return
 
-    # observers have a different lifecycle for kv_cache
-    if is_attention_module(module):
-        return
-
     quantization_args = getattr(quantization_scheme, arg_name, None)
     # dont need observers for dynamic
     if quantization_args is not None and not quantization_args.dynamic:
@@ -102,25 +100,15 @@ def update_weight_zp_scale(module: Module):
     :param quantize_weights_upfront: whether to automatically
        run weight quantization at the start of calibration
     """
-    if not getattr(module, "quantization_scheme", None):
-        # no quantization scheme nothing to do
+    if getattr_chain(module, "quantization_scheme.weights", None) is None:
         return
 
-    status = getattr(module, "quantization_status", None)
-    if not status:
-        # not set to initialize; no scales/zp to update
-        return
-    if status != QuantizationStatus.INITIALIZED:
+    if getattr(module, "quantization_status", None) != QuantizationStatus.CALIBRATION:
         logger.warning(
-            f"Attempting set module with status {status} to calibration mode. "
-            f"but status is not {QuantizationStatus.INITIALIZED} - you may "
-            "be calibrating an uninitialized module which may fail or attempting "
-            "to re-calibrate a frozen module"
+            "Attempting to calibrate weights of a module not in calibration mode"
         )
 
-    if module.quantization_scheme.weights is not None:
-        # set weight scale and zero_point up front, calibration data doesn't affect it
-        call_observer(module=module, base_name="weight")
+    call_observer(module=module, base_name="weight")
 
 
 def calibrate_activations(module: Module, value: torch.Tensor, base_name: str):
@@ -200,21 +188,26 @@ def calibrate_kv_cache_output_hook(module: Module, _args: Any, _output: torch.Te
     update_parameter_data(module, v_scale, KVCacheScaleType.VALUE.value)
 
 
-def set_unset_kv_cache(module: Module):
+def initialize_quantized_kv_cache(module: Module):
     """
-    Set or unset singleton QuantizedKVParameterCache for each
-    attn module when running kv_cache quantization.
+    Initialize a quantized kv_cache on a module (analogous to initializing an observer)
+    When a config specifying kv_cache quantization is applied to a model, the kv_cache
+    args are redefined as the output_activations targeting attention modules.
+
+    This function should be called on attention modules with output_activations
     """
-    if not hasattr(module, "quantization_scheme"):
+    scheme: Optional[QuantizationScheme] = getattr(module, "quantization_scheme", None)
+    existing_kv_cache = getattr(module, "kv_cache", None)
+
+    if (
+        scheme is None
+        or not is_kv_cache_quant_scheme(scheme)
+        or isinstance(existing_kv_cache, QuantizedKVParameterCache)
+    ):
         return
 
-    if is_kv_cache_quant_scheme(module.quantization_scheme):
-        output_args = module.quantization_scheme.output_activations
-        kv_cache = QuantizedKVParameterCache(output_args)
-        if hasattr(module, "kv_cache"):
-            delattr(module, "kv_cache")
-        else:
-            setattr(module, "kv_cache", kv_cache)
+    quantized_kv_cache = QuantizedKVParameterCache(scheme.output_activations)
+    setattr(module, "kv_cache", quantized_kv_cache)
 
 
 def apply_calibration_status(module: Module):
@@ -242,9 +235,21 @@ def freeze_module_quantization(module: Module):
         # nothing to do, already frozen
         return
 
+    # remove observers
     for name in ("input", "weight", "output"):
         obs_name = f"{name}_observer"
         if hasattr(module, obs_name):
             delattr(module, obs_name)
 
+    # remove quantized kv_cache
+    kv_cache = getattr(module, "kv_cache", None)
+    if isinstance(kv_cache, QuantizedKVParameterCache):
+        delattr(module, "kv_cache")
+
     module.quantization_status = QuantizationStatus.FROZEN
+
+
+def reset_quantization_status(model: Module):
+    for module in model.modules():
+        if hasattr(module, "quantization_status"):
+            delattr(module, "quantization_status")
diff --git a/src/llmcompressor/modifiers/quantization/gptq/base.py b/src/llmcompressor/modifiers/quantization/gptq/base.py
index de428cce76..ec6cc02a3f 100644
--- a/src/llmcompressor/modifiers/quantization/gptq/base.py
+++ b/src/llmcompressor/modifiers/quantization/gptq/base.py
@@ -1,9 +1,8 @@
 import contextlib
 import warnings
-from typing import Any, Dict, List, Optional, Tuple, Union
+from typing import Dict, List, Optional, Tuple, Union
 
 import torch
-from compressed_tensors.quantization import QuantizationScheme
 from compressed_tensors.utils import (
     align_module_device,
     get_execution_device,
@@ -11,30 +10,26 @@
     update_offload_parameter,
 )
 from loguru import logger
-from pydantic import Field, PrivateAttr, field_validator
+from pydantic import PrivateAttr, field_validator
 
-from llmcompressor.core import State
-from llmcompressor.modifiers import Modifier, ModifierFactory
-from llmcompressor.modifiers.quantization.calibration import freeze_module_quantization
+from llmcompressor.core import Event, EventType, State
+from llmcompressor.modifiers import Modifier
+from llmcompressor.modifiers.quantization.calibration import (
+    apply_calibration_status,
+    freeze_module_quantization,
+)
 from llmcompressor.modifiers.quantization.gptq.gptq_quantize import (
     accumulate_hessian,
     make_empty_hessian,
     quantize_weight,
 )
-from llmcompressor.modifiers.quantization.quantization.base import QuantizationModifier
-from llmcompressor.modifiers.utils.hooks import HooksMixin
-from llmcompressor.pipelines.basic import run_pipeline as run_basic
-from llmcompressor.pipelines.layer_sequential import (
-    run_pipeline as run_layer_sequential,
-)
-from llmcompressor.pipelines.sequential import run_pipeline as run_sequential
+from llmcompressor.modifiers.quantization.quantization import QuantizationMixin
 from llmcompressor.utils.metric_logging import CompressionLogger
-from llmcompressor.utils.pytorch.module import get_no_split_params, qat_active
 
 __all__ = ["GPTQModifier"]
 
 
-class GPTQModifier(Modifier, HooksMixin):
+class GPTQModifier(Modifier, QuantizationMixin):
     """
     Implements the GPTQ algorithm from https://arxiv.org/abs/2210.17323. This modifier
     uses activations to calibrate a hessian matrix, which is then used to determine
@@ -79,32 +74,31 @@ class GPTQModifier(Modifier, HooksMixin):
     :param block_size: Used to determine number of columns to compress in one pass
     :param dampening_frac: Amount of dampening to apply to H, as a fraction of the
         diagonal norm
-    :param quantize: Set to True to quantize using an existing quantization modifier,
-        or pass in the configuration for a quantization modifier if one does not
-        already exist in the recipe
     :param offload_hessians: Set to True for decreased memory usage but increased
         runtime.
-    :param config_groups: [Used, if a quantization modifier is not specified],
-        dictionary specifying quantization schemes to apply to target
+
+    :param config_groups: dictionary specifying quantization schemes to apply to target
         modules. Modules not matching a scheme target will NOT be quantized.
-    :param scheme: [Used, if a quantization modifier is not specified], the quantization
-        scheme to apply to the model, this is a dictionary that supports all keys from
-        QuantizationScheme except targets, which will be set to the targets parameter
-        set at the modifier level. Can also be set to a dictionary of the format
-        `preset_scheme_name: targets` for example: `W8A8: ['Linear']` for weight 8 bit
-        or a string of a preset scheme if targets is provided
-        and activation 8 bit quantization on the Linear layers.
     :param targets: list of layer names to quantize if a scheme is provided. Defaults
         to Linear layers
-    :param ignore: [Used, if a quantization modifier is not specified]
-        optional list of module class names or submodule names to not
+    :param ignore: optional list of module class names or submodule names to not
         quantize even if they match a target in config_groups. Defaults to empty list.
-    :param num_calibration_steps: Number of steps to run post training calibration for.
-        When None, the entire calibration_dataloader is used
-    :param disable_quantization_observer_epoch: [Used, if a quantization modifier is
-        not specified] Epoch to disable updates to the module
-        quantization observers. At this point, quantized weights and zero points will
-        not be updated. Leave None to not disable observers during QAT. Default is None
+    :param scheme: a single quantization scheme to apply to the model. This is a
+        dictionary that supports all keys from QuantizationScheme except targets, which
+        will be set to the targets parameter set at the modifier level. Can also be set
+        to a dictionary of the format `preset_scheme_name: targets` for example:
+        `W8A8: ['Linear']` for weight and activation 8-bit.
+    :param kv_cache_scheme: optional QuantizationArgs, that specify the
+        quantization of the kv cache. If None, kv cache is not quantized.
+        When applying kv cache quantization to transformer AutoModelForCausalLM,
+        the kv_cache_scheme gets converted into a QuantizationScheme that:
+            - targets the `q_proj` and `k_proj` modules of the model. The outputs
+              of those modules are the keys and values that might be cached
+            - quantizes the outputs of the aformentioned layers, so that
+              keys and values are compressed before storing them in the cache
+        There is an explicit assumption that the model contains modules with
+        `k_proj` and `v_proj` in their names. If this is not the case
+        and kv_cache_scheme != None, the quantization of kv cache will fail
     """
 
     # gptq modifier arguments
@@ -112,19 +106,9 @@ class GPTQModifier(Modifier, HooksMixin):
     sequential_targets: Union[str, List[str], None] = None
     block_size: int = 128
     dampening_frac: Optional[float] = 0.01
-    quantize: Union[bool, Dict] = True
     offload_hessians: bool = False
 
-    # arguments used for attached quant modifier
-    config_groups: Optional[Dict[str, QuantizationScheme]] = None
-    scheme: Optional[Union[str, Dict[str, Any]]] = None
-    targets: Union[str, List[str], None] = None
-    ignore: List[str] = Field(default_factory=list)
-    num_calibration_steps: Optional[int] = None
-    disable_quantization_observer_epoch: Optional[float] = None
-
     # private variables
-    _quantization_modifier: Optional[QuantizationModifier] = PrivateAttr(default=None)
     _module_names: Dict[torch.nn.Module, str] = PrivateAttr(default_factory=dict)
     _hessians: Dict[torch.nn.Module, torch.Tensor] = PrivateAttr(default_factory=dict)
     _num_samples: Dict[torch.nn.Module, int] = PrivateAttr(default_factory=dict)
@@ -140,74 +124,22 @@ def validate_sequential_update(cls, value: bool) -> bool:
 
         return True
 
-    def _check_build_quant_modifier(self, model: torch.nn.Module):
-        """
-        Check the model's quantization state matches that expected by this modifier,
-        adding a default quantization scheme if needed
-
-        # TODO: build modifier during recipe validation
-
-        :param state: session state storing input model and calibration data
-        """
-        quantization_already_active = qat_active(model)
-        if isinstance(self.quantize, bool):
-            if not self.quantize and quantization_already_active:
-                logger.warning(
-                    "GPTQ quantization is set to False, but a "
-                    "quantization modifier is already active on the model "
-                    "resetting quantize to True"
-                )
-                self.quantize = True
-            elif self.quantize and not quantization_already_active:
-                logger.warning(
-                    "GPTQ quantization is set to True without an "
-                    "active quantization modifier."
-                )
-                self._build_quant_modifier()
-            return  # use existing quantization modifier if there is one
-        else:
-            if not isinstance(self.quantize, Dict):
-                raise ValueError(
-                    "GPTQModifier.quantize accepts only a single "
-                    "quantization modifier or a boolean. Found "
-                    f"type {type(self.quantize)}"
-                )
-            if len(self.quantize) != 1:
-                raise ValueError(
-                    "GPTQModifier.quantize accepts only a single "
-                    "quantization modifier or a boolean. Found "
-                    f"{len(self.quantize)} modifiers"
-                )
-            if quantization_already_active:
-                logger.warning(
-                    "Attempting to initialize quantization for GPTQ "
-                    "but a quantization modifier has already been applied. "
-                    "The quantization configuration defined under the "
-                    "GPTQ modifier will be ignored."
-                )
-                self.quantize = True
-                return
-            self._build_quant_modifier_from_dict(self.quantize)
-            self.quantize = True
-
     def on_initialize(self, state: State, **kwargs) -> bool:
         """
         Initialize and run the GPTQ algorithm on the current state
 
         :param state: session state storing input model and calibration data
         """
-        # build quantization modifier
-        self._check_build_quant_modifier(state.model)
-
-        if self._quantization_modifier:
-            self._quantization_modifier.initialize(state, **kwargs)
-        if not self.quantize:
-            raise ValueError("To use the GPTQModifier, quantization must be enabled.")
+        # apply config to model and prepare calibration hooks
+        if QuantizationMixin.has_config(self):
+            QuantizationMixin.attach_scheme_and_observers(self, state.model)
+            QuantizationMixin.register_calibration_hooks(self, state.model)
 
         # prepare module names
         self._module_names = {m: name for name, m in state.model.named_modules()}
 
         # register hooks
+        added_hook = False
         for module in state.model.modules():
             if getattr_chain(module, "quantization_scheme.weights", None) is not None:
                 # HACK: previously, embeddings were not quantized because they were not
@@ -215,65 +147,37 @@ def on_initialize(self, state: State, **kwargs) -> bool:
                 # but in the FUTURE this should be ignored by the user
                 if not isinstance(module, torch.nn.Embedding):
                     self.register_hook(module, self.calibrate_module, "forward")
+                    added_hook = True
 
-        # infer sequential targets
-        if self.sequential_targets is None:
-            self.sequential_targets = get_no_split_params(state.model)
-        if isinstance(self.sequential_targets, str):
-            self.sequential_targets = [self.sequential_targets]
-
-        # infer pipeline
-        model_name = state.model.__class__.__name__
-        input_names = state.data.calib.dataset.column_names
-        unfixable_errors = (
-            torch.OutOfMemoryError,
-            torch._C._LinAlgError,
-            KeyboardInterrupt,
-        )
-        try:
-            run_sequential(
-                state.model,
-                state.data.calib,
-                self.sequential_targets,
-                self.ignore,
-                self,
+        if not added_hook:
+            raise ValueError(
+                "GPTQModifier requires a quantization config be specified by this "
+                "modifier or a modifier preceding it"
             )
-            return True
-
-        except Exception as exception:
-            if isinstance(exception, torch.fx.proxy.TraceError):
-                warnings.warn(
-                    f"Failed to trace {model_name} with inputs {input_names}. For more "
-                    "information on tracing with the sequential pipeline, see "
-                    "https://github.com/vllm-project/llm-compressor/blob/main/"
-                    "src/llmcompressor/transformers/tracing/GUIDE.md"
-                )
-            if isinstance(exception, unfixable_errors):
-                raise exception
-
-            warnings.warn("Falling back to layer_sequential pipeline")
-            try:
-                run_layer_sequential(
-                    state.model,
-                    state.data.calib,
-                    self.sequential_targets,
-                    self,
-                )
-                return True
-
-            except Exception as exception:
-                if isinstance(exception, TypeError):
-                    warnings.warn(f"{model_name} fails layer-wise assumptions")
-                if isinstance(exception, unfixable_errors):
-                    raise exception
-
-                warnings.warn(
-                    "Falling back to basic pipeline, which requires extra memory and "
-                    "may result in decreased accuracy. Consider using "
-                    "`offload_hessians=True`"
-                )
-                run_basic(state.model, state.data.calib, self)
-                return True
+
+        # prepare for calibration
+        state.model.apply(apply_calibration_status)
+
+        return True
+
+    def on_event(self, state: State, event: Event, **kwargs):
+        if event.type_ == EventType.SEQUENTIAL_EPOCH_END:
+            self.compress_modules()
+
+        if event.type_ == EventType.CALIBRATION_EPOCH_END:
+            self.compress_modules()
+
+            # TODO: modify lifecycle to end on calibration epoch end
+            if not self.ended_:
+                self.on_end(state, None)
+
+    def on_end(self, state: State, event: Event, **kwargs):
+        """
+        Finish calibrating by removing observers and calibration hooks
+        """
+        self.ended_ = True  # TODO: move to super call
+        state.model.apply(freeze_module_quantization)  # remove observers
+        self.remove_hooks()  # remove hooks
 
     def on_finalize(self, state: State, **kwargs) -> bool:
         """
@@ -281,13 +185,15 @@ def on_finalize(self, state: State, **kwargs) -> bool:
 
         :param state: session state storing input model and calibration data
         """
-        if self._quantization_modifier:
-            self._quantization_modifier.finalize(state, **kwargs)
+        # TODO: modify lifecycle to end on finalize
+        if not self.ended_:
+            self.on_end(state, None)
+
+        if len(self._num_samples) > 0:
+            raise ValueError(f"Failed to compress {len(self._num_samples)} modules")
 
-        self.remove_hooks()
         self._hessians = dict()
         self._num_samples = dict()
-        state.model.apply(freeze_module_quantization)
 
         return True
 
@@ -298,13 +204,12 @@ def calibrate_module(
         _output: torch.Tensor,
     ):
         """
-        Quantize a module's weight according to the GPTQ algorithm
+        Calibration hook used to accumulate the hessian of the input to the module
 
-        :param name: name of module being quantized
-        :param module: module being quantized
-        :param args: input arguments for module forward pass
-
-        :return: total loss from applying weight quantization to this module
+        :param module: module being calibrated
+        :param args: inputs to the module, the first element of which is the
+            cannonical input
+        :param _output: uncompressed module output, unused
         """
         # Assume that first argument is the input
         inp = args[0]
@@ -326,10 +231,9 @@ def calibrate_module(
                 self._num_samples[module],
             )
 
-    def on_sequential_batch_end(self):
+    def compress_modules(self):
         """
-        Quantize modules.
-        TODO: implement with event callback
+        Quantize modules which have been calibrated
         """
         for module in list(self._num_samples.keys()):
             name = self._module_names[module]
@@ -371,41 +275,3 @@ def _maybe_onload_hessian(self, module: torch.nn.Module):
         if self.offload_hessians:
             if module in self._hessians:  # may have been deleted in context
                 self._hessians[module] = self._hessians[module].to(device="cpu")
-
-    def _build_quant_modifier(self):
-        """
-        Build a quantization modifier based on the specified config_groups,
-        ignore list, and num_calibration_steps.
-
-        :postcondition: self._quantization_modifier is set to the built
-            quantization modifier
-        """
-
-        quantization_args_names = [
-            "config_groups",
-            "targets",
-            "scheme",
-            "num_calibration_steps",
-            "ignore",
-            "disable_quantization_observer_epoch",
-        ]
-
-        quant_args = {
-            key: getattr(self, key)
-            for key in quantization_args_names
-            if getattr(self, key, False)
-        }
-
-        logger.info(f"Building quantization modifier with args: {quant_args}")
-        vllm_quant_config = {"QuantizationModifier": quant_args}
-        self._build_quant_modifier_from_dict(vllm_quant_config)
-
-    def _build_quant_modifier_from_dict(self, quant_config):
-        modifier_type = list(quant_config.keys())[0]
-        modifier_args = quant_config[modifier_type]
-        self._quantization_modifier = ModifierFactory.create(
-            modifier_type,
-            allow_registered=True,
-            allow_experimental=True,
-            **modifier_args,
-        )
diff --git a/src/llmcompressor/modifiers/quantization/quantization/__init__.py b/src/llmcompressor/modifiers/quantization/quantization/__init__.py
index 8bdc93d14c..f268f065fb 100644
--- a/src/llmcompressor/modifiers/quantization/quantization/__init__.py
+++ b/src/llmcompressor/modifiers/quantization/quantization/__init__.py
@@ -1,3 +1,4 @@
 # flake8: noqa
 
 from .base import *
+from .mixin import *
diff --git a/src/llmcompressor/modifiers/quantization/quantization/base.py b/src/llmcompressor/modifiers/quantization/quantization/base.py
index 3a8946aefe..5262d58db1 100644
--- a/src/llmcompressor/modifiers/quantization/quantization/base.py
+++ b/src/llmcompressor/modifiers/quantization/quantization/base.py
@@ -1,43 +1,19 @@
-from typing import Any, Dict, List, Optional, Union
-
-from compressed_tensors.quantization import (
-    QuantizationArgs,
-    QuantizationConfig,
-    QuantizationScheme,
-    QuantizationStatus,
-    apply_quantization_config,
-    is_attention_module,
-    is_preset_scheme,
-    preset_name_to_scheme,
-)
-from loguru import logger
-from pydantic import Field, field_validator
-from torch.nn import Module
+import tqdm
+from compressed_tensors.quantization import disable_quantization, enable_quantization
 
 from llmcompressor.core import Event, EventType, State
 from llmcompressor.modifiers import Modifier
 from llmcompressor.modifiers.quantization.calibration import (
     apply_calibration_status,
-    calibrate_input_hook,
-    calibrate_kv_cache_input_hook,
-    calibrate_kv_cache_output_hook,
-    calibrate_output_hook,
     freeze_module_quantization,
-    initialize_observer,
-    set_unset_kv_cache,
     update_weight_zp_scale,
 )
-from llmcompressor.modifiers.utils.pytorch_helpers import (
-    is_moe_model,
-    run_calibration_forward,
-)
-from llmcompressor.observers.helpers import get_observer_token_count
-from llmcompressor.utils.helpers import calibration_forward_context
+from llmcompressor.modifiers.quantization.quantization.mixin import QuantizationMixin
 
 __all__ = ["QuantizationModifier"]
 
 
-class QuantizationModifier(Modifier):
+class QuantizationModifier(Modifier, QuantizationMixin):
     """
     Enables post training quantization (PTQ) and quantization aware training (QAT) for a
     given module or its submodules. After calibration (PTQ) or the start epoch (QAT),
@@ -46,6 +22,8 @@ class QuantizationModifier(Modifier):
 
     :param config_groups: dictionary specifying quantization schemes to apply to target
         modules. Modules not matching a scheme target will NOT be quantized.
+    :param targets: list of layer names to quantize if a scheme is provided. Defaults
+        to Linear layers
     :param ignore: optional list of module class names or submodule names to not
         quantize even if they match a target in config_groups. Defaults to empty list.
     :param scheme: a single quantization scheme to apply to the model. This is a
@@ -64,313 +42,60 @@ class QuantizationModifier(Modifier):
         There is an explicit assumption that the model contains modules with
         `k_proj` and `v_proj` in their names. If this is not the case
         and kv_cache_scheme != None, the quantization of kv cache will fail
-    :param targets: list of layer names to quantize if a scheme is provided. Defaults
-        to Linear layers
-    :param disable_quantization_observer_epoch: Epoch to disable updates to the module
-        quantization observers. At this point, quantized weights and zero points will
-        not be updated. Leave None to not disable observers during QAT. Default is None
-    :param num_calibration_steps: Number of steps to run post training calibration for.
-        When None, the entire calibration_dataloader is used
     """
 
-    config_groups: Optional[Dict[str, QuantizationScheme]] = None
-    ignore: List[str] = Field(default_factory=list)
-    targets: Union[str, List[str]] = Field(default_factory=lambda: ["Linear"])
-    scheme: Optional[Union[str, Dict[str, Any]]] = None
-    kv_cache_scheme: Optional[QuantizationArgs] = None
-    disable_quantization_observer_epoch: Optional[float] = None
-    num_calibration_steps: Optional[int] = None
-
-    calibration_dataloader_: Any = None
-    calibration_function_: Any = None
-
-    @field_validator("targets", mode="before")
-    def validate_targets(cls, value: Union[str, List[str]]) -> List[str]:
-        if isinstance(value, str):
-            return [value]
-
-        return value
-
     def on_initialize(self, state: State, **kwargs) -> bool:
-        if self.end and self.end != -1:
-            raise ValueError(
-                "end_epoch is disabled for QuantizationModifier and can only be set to"
-                " -1 or None. Given {}".format(self.end)
-            )
-
-        self.calibration_dataloader_ = state.data.calib
-        module = state.model
-
-        # initialize quantization in appropriate modules
-        config = self._apply_modifier_to_model(module)
-        module.apply(lambda module: initialize_observer(module, base_name="weight"))
-
-        if self.calculate_start() == -1:  # one-shot
-            self._check_calibration_data(config)
-            module.apply(update_weight_zp_scale)
-            module.apply(apply_calibration_status)
-            self._calibrate_if_possible(module)
-            self._check_token_distribution(
-                module, threshold=kwargs.get("min_tokens_per_module")
-            )
-            module.apply(freeze_module_quantization)
-
-        return True
-
-    def on_start(self, state: State, event: Event, **kwargs):
-        module = state.model
-        module.apply(update_weight_zp_scale)
-
-    def on_update(self, state: State, event: Event, **kwargs):
-        if event.type_ == EventType.BATCH_START:
-            if self.check_should_disable_observer(event):
-                module = state.model
-                module.apply(freeze_module_quantization)
-
-    def on_end(self, state: State, event: Event, **kwargs):
-        module = state.model
-        module.apply(freeze_module_quantization)
-
-    def create_init_config(self) -> QuantizationConfig:
-        if self.scheme is not None:
-            # takes precedence over config_groups
-
-            if isinstance(self.scheme, str) and is_preset_scheme(self.scheme):
-                # attach targets to scheme
-                self.scheme = {self.scheme: self.targets}
-
-            self.config_groups = {}
-            for idx, key in enumerate(self.scheme.keys()):
-                if is_preset_scheme(key):
-                    scheme = preset_name_to_scheme(key, self.scheme[key])
-                else:
-                    scheme = QuantizationScheme.model_validate(
-                        {"targets": self.scheme[key], **self.scheme}
-                    )
-
-                group_name = f"group_{idx}"
-                self.config_groups[group_name] = scheme
-
-        if self.config_groups is None or len(self.config_groups) == 0:
-            default_quant_scheme = QuantizationScheme(targets=self.targets)
-            self.config_groups = {"group_0": default_quant_scheme}
-            logger.info(
-                f"No config groups were provided, using default {self.config_groups}"
-            )
-
-        return QuantizationConfig(
-            config_groups=self.config_groups,
-            kv_cache_scheme=self.kv_cache_scheme,
-            quantization_status=QuantizationStatus.INITIALIZED,
-            ignore=self.ignore,
-        )
-
-    def calculate_disable_observer_epoch(self) -> float:
         """
-        Get the epoch at which we want to disable to quantization observer
-        :return epoch to disable at, or -1 if it is not set
-        """
-        return (
-            self.disable_quantization_observer_epoch
-            if self.disable_quantization_observer_epoch is not None
-            else -1
-        )
-
-    def check_should_disable_observer(self, event: Event) -> bool:
-        """
-        Given the current index, determine if we should disable the observer
-
-        :param event: Event to get index from
-        :return: True if observer should be disabled, False otherwise
-        """
-        disable_epoch = self.calculate_disable_observer_epoch()
-        if disable_epoch == -1:
-            return False
-        if event.current_index >= disable_epoch:
-            return True
-        return False
-
-    def _check_calibration_data(self, config: QuantizationConfig):
-        has_calibration_data = self.calibration_dataloader_ is not None
-        requires_calibration = config.requires_calibration_data()
-        if self.calculate_start() == -1:  # one shot
-            if requires_calibration and not has_calibration_data:
-                raise ValueError(
-                    "The provided quantization configuration requires calibration data "
-                    "but none was provided. Calibration data is required for static "
-                    "quantization of input or output activations."
-                )
-            if not requires_calibration and has_calibration_data:
-                logger.info(
-                    "Skipping QuantizationModifier calibration, it is not required for "
-                    "the provided quantization config."
-                )
-                self.calibration_dataloader_ = None
-
-    def _apply_modifier_to_model(self, model: Module):
-        modifier_as_config = self.create_init_config()
-        # Add step to attach kv_cache to the model, if present within the config
-        apply_quantization_config(model, modifier_as_config)
-        model.apply(set_unset_kv_cache)
-        return modifier_as_config
+        Prepare to calibrate activations and weights
 
-    def _calibrate_if_possible(self, module: Module):
-        # TODO: @dsikka restructure such that all of calibration isn't happening
-        # on init
-        # flake8: noqa
-        """# noqa: E501
-        Run calibration if running input/output activation quantization or kv_cache
-        quantization.
-
-        Calibration Lifecycle for a single torch.nn.Module:
-
-        initialize_observer():
-            if input/output activation:
-                - observer = Observer.load_from_registry(...)
-                - module.register_module(f"{base_name}_observer", observer)
-
-        register_calibration_hooks():
-            if input activation and not dynamic quant (used to call observers before intput QDQ):
-                - pre_hook := calibrate_input_hook
-            if output activation and not dynamic quant (used to call observers before output QDQ):
-                - post_hook := calibrate_kv_cache_output_hook
-            if kv_cache quantization (used to set kv_cache to QuantizedKVParameterCache and update k_scale/v_scale)
-                - pre_hook := calibrate_kv_cache_input_hook
-                - post_hook := calibrate_kv_cache_output_hook
-
-        self._calibrate(module) # run forward pass through model using calibration data
-        set_unset_kv_cache() # remove kv_cache objects attached to attention layers
-        # initially set in _apply_modifier_to_model
-        remove calibration hooks in self.calibration_hooks_
-        remove observers
+        According to the quantization config, a quantization scheme is attached to each
+        targeted module. The module's forward call is also overwritten to perform
+        quantization to inputs, weights, and outputs.
 
+        Then, according to the module's quantization scheme, observers and calibration
+        hooks are added. These hooks are disabled until the modifier starts.
         """
-        if self.num_calibration_steps == 0 and self.calibration_dataloader_:
-            logger.warning(
-                f"num_calibration_steps is {self.num_calibration_steps}."
-                f"Calibration data loader will not be used."
-            )
-        elif self.num_calibration_steps and not self.calibration_dataloader_:
+        if not QuantizationMixin.has_config(self):
             raise ValueError(
-                f"num_calibration_steps is {self.num_calibration_steps}. "
-                "Calibration data loader is not set. Pass a "
-                "calibration_data_loader with initialize(...) method."
+                "QuantizationModifier requires that quantization fields to be specified"
             )
 
-        elif not self.calibration_dataloader_:
-            return
+        QuantizationMixin.attach_scheme_and_observers(self, state.model)
+        state.model.apply(disable_quantization)  # disable quantization until start
+
+        # FUTURE: modify oneshot lifecycle to trigger on_start for on initialize
+        if self.calculate_start() == -1:  # one shot
+            self.on_start(state)
 
-        module.apply(lambda model: initialize_observer(model, base_name="input"))
-        module.apply(lambda model: initialize_observer(model, base_name="output"))
-        module.apply(self.register_calibration_hooks)
-        self._calibrate(module)
-        module.apply(set_unset_kv_cache)
-        self.remove_hooks()
+        return True
 
-    def register_calibration_hooks(self, module: Module):
+    def on_start(self, state: State):
         """
-        Register hooks for input/output activation or kv_cache quantization.
+        Begin calibrating activations and weights. Calibrate weights only once on start
         """
-        quantization_scheme = getattr(module, "quantization_scheme", None)
-        if not quantization_scheme:
-            return
+        QuantizationMixin.register_calibration_hooks(self, state.model)
+        state.model.apply(apply_calibration_status)
+        state.model.apply(enable_quantization)
 
-        is_attention_module_ = is_attention_module(module)
-        input_quant = quantization_scheme.input_activations
-        output_quant = quantization_scheme.output_activations
+        modules = list(state.model.modules())
+        for module in tqdm.tqdm(modules, desc="Calibrating weights"):
+            update_weight_zp_scale(module)
 
-        calibrate_inputs = (
-            input_quant and not is_attention_module_ and not input_quant.dynamic
-        )
+    def on_event(self, state: State, event: Event, **kwargs):
+        if event.type_ == EventType.CALIBRATION_EPOCH_END:
+            # TODO: modify lifecycle to end on calibration epoch end
+            if not self.ended_:
+                self.on_end(state, None)
 
-        # Calibrate inputs if an input_quant is provided and not running dynamic quant
-        if calibrate_inputs:
-            self.register_hook(module, calibrate_input_hook, "forward_pre")
-
-        if output_quant:
-            # hooks for attn modules if running kv_cache quant
-            if is_attention_module_:
-                self.register_hook(
-                    module,
-                    calibrate_kv_cache_input_hook,
-                    "forward_pre",
-                    with_kwargs=True,
-                )
-
-                self.register_hook(module, calibrate_kv_cache_output_hook, "forward")
-
-            # hooks for output quant if not running dynamic quant
-            elif not output_quant.dynamic:
-                self.register_hook(module, calibrate_output_hook, "forward")
-
-    def _calibrate(self, module: Module):
-        class_name = self.__class__.__name__.replace("PyTorch", "")
-        logger.info(
-            f"Running {class_name} calibration with "
-            f"{len(self.calibration_dataloader_)} samples..."
-        )
-
-        with calibration_forward_context(module):
-            run_calibration_forward(
-                module,
-                self.calibration_dataloader_,
-                self.num_calibration_steps,
-                self.calibration_function_,
-            )
-
-    def _check_token_distribution(
-        self, model: Module, threshold: Optional[float] = None
-    ):
+    def on_end(self, state: State, event: Event, **kwargs):
         """
-        A helper function that warns when a module has seen
-        fewer than threshold % of all the tokens throughout
-        the calibration process.
-        Checks are only triggered if threshold is not None.
-        :param model: the model to validate
-        :param threshold: the minimum percentage of tokens
-            (out of all the tokens in a batch) a module should
-            receive during calibration
+        Finish calibrating by removing observers and calibration hooks
         """
-
-        if self.calibration_dataloader_ is None:
-            logger.debug("Skipping token distribution check. No calibration data.")
-            return
-
-        if not is_moe_model(model):
-            logger.debug("Skipping token distribution check. Not a MoE model.")
-            return
-
-        if threshold is None:
-            logger.warning(
-                "Mixture of Experts model detected, but threshold not set. "
-                "Defaulting token threshold to 1/num_experts."
-            )
-
-            if not hasattr(model.config, "num_local_experts"):
-                logger.warning(
-                    "Mixture of Experts model detected but `num_local_experts` "
-                    "not found in model config. Skipping distribution check."
-                )
-                return
-
-            threshold = 1 / model.config.num_local_experts
-            logger.debug(f"Setting token threshold to {threshold}.")
-
-        all_tokens = self.calibration_dataloader_.dataset["input_ids"]
-        total_token_count = sum(len(sample) for sample in all_tokens)
-        counter = get_observer_token_count(model)
-        for module_name, token_count in counter.items():
-            if token_count is None:
-                # the module has not been observed
-                # or its token_count is not being recorded
-                # by the observer (refer to the observer's
-                # implementation in the source code)
-                continue
-            if token_count / total_token_count < threshold:
-                logger.warning(
-                    f"The module_name: {module_name} "
-                    f"received less than {int(threshold * 100)}% "
-                    "of calibration batch tokens "
-                    f"({token_count}/{total_token_count} tokens). "
-                    "This could harm the quantization quality."
-                )
+        self.ended_ = True  # TODO: move to super call
+        state.model.apply(freeze_module_quantization)  # remove observers
+        self.remove_hooks()  # remove hooks
+
+    def on_finalize(self, state: State, **kwargs) -> bool:
+        # TODO: modify lifecycle to end on finalize
+        if not self.ended_:
+            self.on_end(state, None)
diff --git a/src/llmcompressor/modifiers/quantization/quantization/mixin.py b/src/llmcompressor/modifiers/quantization/quantization/mixin.py
new file mode 100644
index 0000000000..d5611541e8
--- /dev/null
+++ b/src/llmcompressor/modifiers/quantization/quantization/mixin.py
@@ -0,0 +1,225 @@
+from typing import Any, Dict, List, Optional, Union
+
+import torch
+from compressed_tensors.quantization import (
+    QuantizationArgs,
+    QuantizationConfig,
+    QuantizationScheme,
+    QuantizationStatus,
+    apply_quantization_config,
+    is_attention_module,
+    is_preset_scheme,
+    preset_name_to_scheme,
+)
+from pydantic import Field, field_validator
+
+from llmcompressor.modifiers.quantization.calibration import (
+    calibrate_input_hook,
+    calibrate_kv_cache_input_hook,
+    calibrate_kv_cache_output_hook,
+    calibrate_output_hook,
+    initialize_observer,
+    initialize_quantized_kv_cache,
+    reset_quantization_status,
+)
+from llmcompressor.modifiers.utils.hooks import HooksMixin
+
+__all__ = ["QuantizationMixin"]
+
+
+class QuantizationMixin(HooksMixin):
+    """
+    Mixin which enables a Modifier to act as a quantization config, attching observers,
+    calibration hooks, and compression wrappers to modifiers
+
+    Lifecycle:
+        - QuantizationMixin.attach_scheme_and_observers(model)
+            - Wraps model forward and attaches quantization scheme and observers
+        - QuantizationMixin.register_calibration_hooks(model)
+            - Registers calibration hooks which utilize observers to calibrate qparams
+        - model.apply(apply_calibration_status)
+        - [ Calibrate model ]
+        - model.apply(freeze_module_quantization)
+            - Remove observers
+        - self.remove_hooks()
+            - Remove calibration hooks
+
+        Scheme is left attached to modules after PTQ finishes
+
+    :param config_groups: dictionary specifying quantization schemes to apply to target
+        modules. Modules not matching a scheme target will NOT be quantized.
+    :param targets: list of layer names to quantize if a scheme is provided. Defaults
+        to Linear layers
+    :param ignore: optional list of module class names or submodule names to not
+        quantize even if they match a target in config_groups. Defaults to empty list.
+    :param scheme: a single quantization scheme to apply to the model. This is a
+        dictionary that supports all keys from QuantizationScheme except targets, which
+        will be set to the targets parameter set at the modifier level. Can also be set
+        to a dictionary of the format `preset_scheme_name: targets` for example:
+        `W8A8: ['Linear']` for weight and activation 8-bit.
+    :param kv_cache_scheme: optional QuantizationArgs, that specify the
+        quantization of the kv cache. If None, kv cache is not quantized.
+        When applying kv cache quantization to transformer AutoModelForCausalLM,
+        the kv_cache_scheme gets converted into a QuantizationScheme that:
+            - targets the `q_proj` and `k_proj` modules of the model. The outputs
+              of those modules are the keys and values that might be cached
+            - quantizes the outputs of the aformentioned layers, so that
+              keys and values are compressed before storing them in the cache
+        There is an explicit assumption that the model contains modules with
+        `k_proj` and `v_proj` in their names. If this is not the case
+        and kv_cache_scheme != None, the quantization of kv cache will fail
+    """
+
+    config_groups: Optional[Dict[str, QuantizationScheme]] = None
+    targets: Union[str, List[str]] = Field(default_factory=lambda: ["Linear"])
+    ignore: List[str] = Field(default_factory=list)
+    scheme: Optional[Union[str, Dict[str, Any]]] = None
+    kv_cache_scheme: Optional[QuantizationArgs] = None
+
+    @field_validator("targets", mode="before")
+    def validate_targets(cls, value: Union[str, List[str]]) -> List[str]:
+        if isinstance(value, str):
+            return [value]
+
+        return value
+
+    @field_validator("scheme", mode="before")
+    def validate_scheme(
+        cls, value: Optional[Union[str, Dict[str, Any]]]
+    ) -> Optional[Union[str, Dict[str, Any]]]:
+        if isinstance(value, str) and not is_preset_scheme(value):
+            raise ValueError(
+                "`scheme` must either be a preset scheme name or a dictionary "
+                "of preset scheme names"
+            )
+
+        if isinstance(value, dict):
+            for scheme_name in value.keys():
+                cls.validate_scheme(scheme_name)
+
+            for key, target in value.items():
+                value[key] = cls.validate_targets(target)
+
+        return value
+
+    def attach_scheme_and_observers(self, model: torch.nn.Module):
+        """
+        Apply this modifier as a quantization config to the model. Attach observers
+        according to the schemes attached to each module
+        """
+        reset_quantization_status(model)  # reset any previously applied qconfigs
+
+        config = self.resolve_quantization_config()
+        apply_quantization_config(model, config)
+
+        model.apply(self._initialize_observers)
+
+    def register_calibration_hooks(self, model: torch.nn.Module):
+        """
+        Register activation calibration hooks (including kv_cache quantization)
+        """
+        model.apply(self._initialize_hooks)
+
+    def has_config(self) -> bool:
+        return not (
+            self.config_groups is None
+            and self.targets == ["Linear"]
+            and self.ignore == []
+            and self.scheme is None
+            and self.kv_cache_scheme is None
+        )
+
+    def resolve_quantization_config(self) -> QuantizationConfig:
+        """
+        Returns the quantization config specified by this modifier
+        """
+        scheme = self.scheme
+        targets = self.targets
+        config_groups = self.config_groups
+        kv_cache_scheme = self.kv_cache_scheme
+        ignore = self.ignore
+
+        if scheme is not None and config_groups is not None:
+            raise ValueError("Please specify either `scheme` or `config_groups`")
+
+        if scheme is not None:
+            # takes precedence over config_groups
+
+            if isinstance(scheme, str) and is_preset_scheme(scheme):
+                # attach targets to scheme
+                scheme = {scheme: targets}
+
+            config_groups = {}
+            for idx, key in enumerate(scheme.keys()):
+                if is_preset_scheme(key):
+                    scheme = preset_name_to_scheme(key, scheme[key])
+                else:
+                    scheme = QuantizationScheme.model_validate(
+                        {"targets": scheme[key], **scheme}
+                    )
+
+                group_name = f"group_{idx}"
+                config_groups[group_name] = scheme
+
+        if config_groups is None or len(config_groups) == 0:
+            default_quant_scheme = QuantizationScheme(targets=targets)
+            config_groups = {"group_0": default_quant_scheme}
+
+        return QuantizationConfig(
+            config_groups=config_groups,
+            kv_cache_scheme=kv_cache_scheme,
+            quantization_status=QuantizationStatus.INITIALIZED,
+            ignore=ignore,
+        )
+
+    def _initialize_observers(self, module: torch.nn.Module):
+        if not hasattr(module, "quantization_scheme"):
+            return
+
+        scheme: QuantizationScheme = module.quantization_scheme
+        input = scheme.input_activations and not scheme.input_activations.dynamic
+        weight = scheme.weights is not None
+        output = scheme.output_activations and not scheme.output_activations.dynamic
+        is_attention = is_attention_module(module)
+
+        # input activations
+        if input:
+            initialize_observer(module, base_name="input")
+
+        # weight observers (used by `update_weight_zp_scale` or child modifier)
+        if weight:
+            initialize_observer(module, base_name="weight")
+
+        # kv_cache activations. Within `apply_quantization_config`, the config is
+        # modified to use attention output quantization if a kv_cache_scheme exists
+        if is_attention and output:
+            initialize_quantized_kv_cache(module)
+
+        # output activations
+        elif output:
+            initialize_observer(module, base_name="output")
+
+    def _initialize_hooks(self, module: torch.nn.Module):
+        if not hasattr(module, "quantization_scheme"):
+            return
+
+        scheme: QuantizationScheme = module.quantization_scheme
+        input = scheme.input_activations and not scheme.input_activations.dynamic
+        output = scheme.output_activations and not scheme.output_activations.dynamic
+        is_attention = is_attention_module(module)
+
+        # input activations
+        if input:
+            self.register_hook(module, calibrate_input_hook, "forward_pre")
+
+        # kv_cache activations. Within `apply_quantization_config`, the config is
+        # modified to use attention output quantization if a kv_cache_scheme exists
+        if is_attention and output:
+            self.register_hook(
+                module, calibrate_kv_cache_input_hook, "forward_pre", with_kwargs=True
+            )
+            self.register_hook(module, calibrate_kv_cache_output_hook, "forward")
+
+        # output activations
+        elif output:
+            self.register_hook(module, calibrate_output_hook, "forward")
diff --git a/src/llmcompressor/modifiers/smoothquant/base.py b/src/llmcompressor/modifiers/smoothquant/base.py
index a08d570eb1..09dae463c5 100644
--- a/src/llmcompressor/modifiers/smoothquant/base.py
+++ b/src/llmcompressor/modifiers/smoothquant/base.py
@@ -7,15 +7,13 @@
 from pydantic import ConfigDict, Field
 from torch.nn import Module
 
-from llmcompressor.core import State
+from llmcompressor.core import Event, EventType, State
 from llmcompressor.modifiers import Modifier
 from llmcompressor.modifiers.smoothquant.utils import (
     get_layer_mappings_from_architecture,
     handle_mapping_resolution_errors,
 )
-from llmcompressor.modifiers.utils.pytorch_helpers import run_calibration_forward
 from llmcompressor.utils.fsdp.helpers import get_fsdp_parent
-from llmcompressor.utils.helpers import calibration_forward_context
 from llmcompressor.utils.pytorch.module import (
     get_layers,
     get_matching_layer,
@@ -134,21 +132,36 @@ def on_initialize(self, state: State, **kwargs) -> bool:
         self.resolved_mappings_ = self._resolve_mappings(state.model)
         self.scales_ = {}
 
-        calibration_dataloader = state.data.calib
-
         self._setup_scale_hooks()
-        self._calibrate(state.model, calibration_dataloader)
-        self._apply_smoothing(state.model)
 
         return True
 
+    def on_event(self, state: State, event: Event, **kwargs):
+        if event.type_ == EventType.SEQUENTIAL_EPOCH_END:
+            self._apply_smoothing(state.model)
+
+        if event.type_ == EventType.CALIBRATION_EPOCH_END:
+            self._apply_smoothing(state.model)
+
+            # TODO: modify lifecycle to end on calibration epoch end
+            if not self.ended_:
+                self.on_end(state, None)
+
+    def on_end(self, state: State, event: Event, **kwargs):
+        self.ended_ = True  # TODO: move to super calls
+        self.remove_hooks()  # remove hooks
+
     def on_finalize(self, state: State, **kwargs) -> bool:
         """
         Clean up by clearing the scale and mapping data
-
-        :param state: unused
-        :return: True
         """
+        # TODO: modify lifecycle to end on finalize
+        if not self.ended_:
+            self.on_end(state, None)
+
+        if len(self.scales_) > 0:
+            raise ValueError(f"Failed to compress {len(self.scales_)} modules")
+
         if self.scales_ is not None:
             self.scales_.clear()
         if self.resolved_mappings_ is not None:
@@ -237,34 +250,6 @@ def hook_fn(module, inp, out):
             layer = mapping.smooth_layer
             self.register_hook(layer, create_hook_fn(name), "forward")
 
-    @torch.no_grad()
-    def _calibrate(self, model: Module, calibration_dataloader: List):
-        """
-        Catch the output dynamic ranges of each layer that will be smoothed by running
-        forward passes with calibration_dataloader
-        """
-        class_name = self.__class__.__name__.replace("PyTorch", "")
-        logger.info(
-            f"Running {class_name} calibration with "
-            f"{len(calibration_dataloader)} samples..."
-        )
-        if not calibration_dataloader:
-            raise ValueError(
-                "Calibration data loader not set, must populate the calib_data field of"
-                " CompressionSession to run the SmoothQuant modifier"
-            )
-
-        with calibration_forward_context(model):
-            run_calibration_forward(
-                model,
-                calibration_dataloader,
-                self.num_calibration_steps,
-                self.calibration_function,
-            )
-
-        # remove the hooks now that we are done calibrating
-        self.remove_hooks()
-
     @torch.no_grad()
     def _apply_smoothing(self, model: Module):
         """
@@ -276,8 +261,11 @@ def _apply_smoothing(self, model: Module):
 
         This modifies the weights of the model in-place.
         """
-        logger.info("Smoothing activation scales...")
         for mapping in self.resolved_mappings_:
+            if mapping.smooth_name not in self.scales_:
+                continue
+            logger.info(f"Smoothing with {mapping.smooth_name}")
+
             activation_scales = (  # get dynamic range for each activation channel
                 self.scales_[mapping.smooth_name].max_channel_vals
                 - self.scales_[mapping.smooth_name].min_channel_vals
@@ -312,6 +300,9 @@ def smooth(module):
                     smooth(layer)
                 smooth(smooth_layer)
 
+            # clear calibration data
+            del self.scales_[mapping.smooth_name]
+
     def _calculate_smoothing_scales(
         self, balance_layers: List[Module], activation_scales: torch.Tensor
     ) -> List[float]:
diff --git a/src/llmcompressor/modifiers/stage.py b/src/llmcompressor/modifiers/stage.py
index 75a11ffc50..ae27fdbfe8 100644
--- a/src/llmcompressor/modifiers/stage.py
+++ b/src/llmcompressor/modifiers/stage.py
@@ -80,7 +80,8 @@ def initialize(self, state: "State", **kwargs):
 
         accelerator = kwargs.get("accelerator", None)
         for modifier in self.modifiers:
-            modifier.initialize(state, **kwargs)
+            if not modifier.initialized:
+                modifier.initialize(state, **kwargs)
             if accelerator:
                 accelerator.wait_for_everyone()
         state.loggers.system.info(tag="stage", string="Modifiers initialized")
diff --git a/src/llmcompressor/pipelines/basic/pipeline.py b/src/llmcompressor/pipelines/basic/pipeline.py
index 13a1c9454c..593745e83c 100644
--- a/src/llmcompressor/pipelines/basic/pipeline.py
+++ b/src/llmcompressor/pipelines/basic/pipeline.py
@@ -1,24 +1,25 @@
-from typing import TYPE_CHECKING, Optional
+from typing import TYPE_CHECKING
 
 import torch
-import torch.utils.data.dataloader
 import tqdm
 from compressed_tensors.utils import get_execution_device
+from torch.utils.data.dataloader import DataLoader
 
+from llmcompressor.core import LifecycleCallbacks, active_session
 from llmcompressor.modifiers.utils.pytorch_helpers import apply_pad_mask_to_batch
 from llmcompressor.pytorch.utils.helpers import tensors_to_device
 from llmcompressor.utils.helpers import calibration_forward_context
 
 if TYPE_CHECKING:
-    from llmcompressor.modifiers import Modifier
+    from llmcompressor.args.dataset_arguments import DatasetArguments
 
 __all__ = ["run_pipeline"]
 
 
 def run_pipeline(
     model: torch.nn.Module,
-    dataloader: torch.utils.data.DataLoader,
-    callback_modifier: Optional["Modifier"] = None,
+    dataloader: DataLoader,
+    dataset_args: "DatasetArguments",
 ):
     """
     Run a basic data pipeline.
@@ -30,16 +31,17 @@ def run_pipeline(
 
     :param model: model being calibrated
     :param dataloader: loads data for calibration
-    :param callback_modifier: Temporary HACK which should be replaced by event callback
+    :param modifiers: list of modifiers, only included to match PipelineFn signature
     """
+    session = active_session()
     model_device = get_execution_device(model)
 
+    session.initialize()
+
     with calibration_forward_context(model):
         for batch in tqdm.tqdm(dataloader, desc="Calibrating"):
             batch = apply_pad_mask_to_batch(batch)
             batch = tensors_to_device(batch, model_device)
             model(**batch)
 
-        # TODO: replace with a lifecycle event
-        if callback_modifier:
-            callback_modifier.on_sequential_batch_end()
+    LifecycleCallbacks.calibration_epoch_end()
diff --git a/src/llmcompressor/pipelines/data_free/__init__.py b/src/llmcompressor/pipelines/data_free/__init__.py
new file mode 100644
index 0000000000..fc60475ca8
--- /dev/null
+++ b/src/llmcompressor/pipelines/data_free/__init__.py
@@ -0,0 +1,2 @@
+# flake8: noqa
+from .pipeline import run_pipeline
diff --git a/src/llmcompressor/pipelines/data_free/pipeline.py b/src/llmcompressor/pipelines/data_free/pipeline.py
new file mode 100644
index 0000000000..13a6590de5
--- /dev/null
+++ b/src/llmcompressor/pipelines/data_free/pipeline.py
@@ -0,0 +1,24 @@
+from typing import TYPE_CHECKING
+
+import torch
+from torch.utils.data.dataloader import DataLoader
+
+from llmcompressor.core.session_functions import LifecycleCallbacks, active_session
+
+if TYPE_CHECKING:
+    from llmcompressor.args.dataset_arguments import DatasetArguments
+
+__all__ = ["run_pipeline"]
+
+
+def run_pipeline(
+    model: torch.nn.Module,
+    dataloader: DataLoader,
+    dataset_args: "DatasetArguments",
+):
+    """
+    A pipeline for data-free calibration
+    """
+    session = active_session()
+    session.initialize()
+    LifecycleCallbacks.calibration_epoch_end()
diff --git a/src/llmcompressor/pipelines/independent/__init__.py b/src/llmcompressor/pipelines/independent/__init__.py
new file mode 100644
index 0000000000..fc60475ca8
--- /dev/null
+++ b/src/llmcompressor/pipelines/independent/__init__.py
@@ -0,0 +1,2 @@
+# flake8: noqa
+from .pipeline import run_pipeline
diff --git a/src/llmcompressor/pipelines/independent/pipeline.py b/src/llmcompressor/pipelines/independent/pipeline.py
new file mode 100644
index 0000000000..486bf6d41a
--- /dev/null
+++ b/src/llmcompressor/pipelines/independent/pipeline.py
@@ -0,0 +1,40 @@
+from typing import TYPE_CHECKING
+
+import torch
+from loguru import logger
+from torch.utils.data.dataloader import DataLoader
+
+from llmcompressor.core import active_session
+from llmcompressor.modifiers.stage import StageModifiers
+from llmcompressor.utils.helpers import patch_attr
+
+if TYPE_CHECKING:
+    from llmcompressor.args.dataset_arguments import DatasetArguments
+
+__all__ = ["run_pipeline"]
+
+
+def run_pipeline(
+    model: torch.nn.Module,
+    dataloader: DataLoader,
+    dataset_args: "DatasetArguments",
+):
+    # avoid circular import
+    from llmcompressor.pipelines.registry import get_pipeline_fn
+
+    session = active_session()
+
+    modifiers = session.get_modifiers()
+    with patch_attr(session.lifecycle, "modifiers", None):
+        for index, modifier in enumerate(modifiers):
+            mod_type = str(type(modifier).__name__)
+            session.lifecycle.modifiers = [
+                StageModifiers(modifiers=[modifier], group=mod_type, index=index)
+            ]
+
+            pipeline, pipeline_fn = get_pipeline_fn(user=None, modifiers=[modifier])
+            logger.info(f"Inferred `{pipeline}` calibration pipeline for `{mod_type}`")
+
+            pipeline_fn(model, dataloader, dataset_args)
+
+        # restore modifiers on exit for proper model compression inference from recipe
diff --git a/src/llmcompressor/pipelines/layer_sequential/pipeline.py b/src/llmcompressor/pipelines/layer_sequential/pipeline.py
index 6f7ec81b66..b74db0f66a 100644
--- a/src/llmcompressor/pipelines/layer_sequential/pipeline.py
+++ b/src/llmcompressor/pipelines/layer_sequential/pipeline.py
@@ -1,9 +1,10 @@
-from typing import TYPE_CHECKING, List, Optional
+from typing import TYPE_CHECKING
 
 import torch
-import torch.utils.data.dataloader
 import tqdm
+from torch.utils.data.dataloader import DataLoader
 
+from llmcompressor.core import LifecycleCallbacks, active_session
 from llmcompressor.modifiers.utils.hooks import HooksMixin
 from llmcompressor.pipelines.cache import IntermediatesCache
 from llmcompressor.pipelines.layer_sequential.helpers import (
@@ -12,19 +13,18 @@
     maybe_inject_pos_embeddings,
     to_next_layer_kwargs,
 )
+from llmcompressor.pipelines.sequential.helpers import get_targets_from_modifiers
 from llmcompressor.utils.helpers import DisableQuantization, calibration_forward_context
 
 if TYPE_CHECKING:
-    from llmcompressor.modifiers import Modifier
+    from llmcompressor.args.dataset_arguments import DatasetArguments
+
 
 __all__ = ["run_pipeline"]
 
 
 def run_pipeline(
-    model: torch.nn.Module,
-    dataloader: torch.utils.data.DataLoader,
-    sequential_targets: List[str],
-    callback_modifier: Optional["Modifier"] = None,
+    model: torch.nn.Module, dataloader: DataLoader, dataset_args: "DatasetArguments"
 ):
     """
     Run a layer-wise sequential data pipeline according to the following steps:
@@ -48,9 +48,14 @@ def run_pipeline(
     :param sequential_targets: patterns which match to the layer modules of the model
     :param callback_modifier: Temporary HACK which should be replaced by event callback
     """
+    session = active_session()
+
     # find layers
+    modifiers = session.get_modifiers()
+    sequential_targets, _ = get_targets_from_modifiers(modifiers, model)
     layers = match_modules(model, sequential_targets)
 
+    session.initialize()
     with calibration_forward_context(model), DisableQuantization(model):
         # prepare intermediates cache
         intermediates: IntermediatesCache = capture_first_layer_intermediates(
@@ -68,9 +73,8 @@ def run_pipeline(
                 inputs = intermediates.fetch(batch_index)
                 layer(**inputs)
 
-            # TODO: replace with a lifecycle event
-            if callback_modifier:
-                callback_modifier.on_sequential_batch_end()
+            # trigger compression
+            LifecycleCallbacks.sequential_epoch_end()
 
             # this pass does not trigger modifier hooks
             # and is only used for capturing outputs from the newly compressed modules
@@ -86,3 +90,6 @@ def run_pipeline(
 
                         intermediates.delete(batch_index)
                         intermediates.update(batch_index, output)
+
+        # redudant, finish any remaining compression
+        LifecycleCallbacks.calibration_epoch_end()
diff --git a/src/llmcompressor/pipelines/registry.py b/src/llmcompressor/pipelines/registry.py
new file mode 100644
index 0000000000..4debfb24f2
--- /dev/null
+++ b/src/llmcompressor/pipelines/registry.py
@@ -0,0 +1,82 @@
+from typing import Dict, List, Optional, Tuple
+
+from loguru import logger
+
+from llmcompressor.modifiers import Modifier
+from llmcompressor.modifiers.obcq.sgpt_mixin import SparsityModifierMixin
+from llmcompressor.modifiers.quantization import GPTQModifier, QuantizationMixin
+from llmcompressor.modifiers.smoothquant import SmoothQuantModifier
+from llmcompressor.pipelines import (
+    basic,
+    data_free,
+    independent,
+    layer_sequential,
+    sequential,
+)
+from llmcompressor.typing import PipelineFn
+
+__all__ = ["PIPELINES", "get_pipeline_fn"]
+
+SEQUENTIAL_MODIFIERS = (GPTQModifier, SparsityModifierMixin)
+
+PIPELINES: Dict[str, PipelineFn] = {
+    "sequential": sequential.run_pipeline,
+    "layer_sequential": layer_sequential.run_pipeline,
+    "basic": basic.run_pipeline,
+    "independent": independent.run_pipeline,
+    "data_free": data_free.run_pipeline,
+}
+
+
+def get_pipeline_fn(
+    user: Optional[str], modifiers: List[Modifier]
+) -> Tuple[str, PipelineFn]:
+    inferred_pipeline = infer_pipeline_fn(modifiers)
+
+    if user is not None and user != inferred_pipeline and user != "independent":
+        logger.warning(
+            f"Calibration pipeline is set to `{user}`, but it is recommend to "
+            f"use `{inferred_pipeline}`"
+        )
+
+    pipeline = user or inferred_pipeline
+
+    if pipeline not in PIPELINES:
+        raise ValueError(
+            f"Cannot find `{pipeline}` in registered pipelines {PIPELINES.keys()}"
+        )
+
+    return pipeline, PIPELINES[pipeline]
+
+
+def infer_pipeline_fn(modifiers: List[Modifier]) -> str:
+    if any(isinstance(modifier, SEQUENTIAL_MODIFIERS) for modifier in modifiers):
+        return "sequential"
+
+    quant_modifiers = _get_quantization_modifiers(modifiers)
+    if len(quant_modifiers) > 1:
+        raise ValueError(
+            f"Recipe contains more than one quantization modifier ({quant_modifiers})."
+            "Please modify your recipe to use at most one quantization modifier"
+        )
+
+    if len(quant_modifiers) == 1:
+        quant_modifier = quant_modifiers[0]
+        config = quant_modifier.resolve_quantization_config()
+        if config.requires_calibration_data():
+            return "basic"
+        else:
+            return "data_free"
+
+    if any(isinstance(modifier, SmoothQuantModifier) for modifier in modifiers):
+        return "basic"
+
+    return "data_free"
+
+
+def _get_quantization_modifiers(modifiers: List[Modifier]) -> List[QuantizationMixin]:
+    return [
+        modifier
+        for modifier in modifiers
+        if isinstance(modifier, QuantizationMixin) and modifier.has_config()
+    ]
diff --git a/src/llmcompressor/pipelines/sequential/__init__.py b/src/llmcompressor/pipelines/sequential/__init__.py
index fc60475ca8..6de607ce4e 100644
--- a/src/llmcompressor/pipelines/sequential/__init__.py
+++ b/src/llmcompressor/pipelines/sequential/__init__.py
@@ -1,2 +1,2 @@
 # flake8: noqa
-from .pipeline import run_pipeline
+from .pipeline import get_targets_from_modifiers, run_pipeline
diff --git a/src/llmcompressor/pipelines/sequential/helpers.py b/src/llmcompressor/pipelines/sequential/helpers.py
index 125517c1a6..589cfd7a1f 100644
--- a/src/llmcompressor/pipelines/sequential/helpers.py
+++ b/src/llmcompressor/pipelines/sequential/helpers.py
@@ -5,7 +5,8 @@
 
 from compressed_tensors import has_offloaded_params
 from compressed_tensors.quantization import find_name_or_class_matches
-from torch.fx import Graph, GraphModule, Node
+from loguru import logger
+from torch.fx import Graph, GraphModule, Node, _symbolic_trace
 from torch.fx.graph import PythonCode
 from torch.fx.proxy import Argument
 from torch.nn import Module
@@ -13,10 +14,12 @@
 from transformers.configuration_utils import PretrainedConfig
 from transformers.utils.fx import HFTracer
 
+from llmcompressor.modifiers import Modifier
 from llmcompressor.modifiers.utils.hooks import HooksMixin
 from llmcompressor.utils.helpers import calibration_forward_context, patch_attr
+from llmcompressor.utils.pytorch.module import get_no_split_params
 
-__all__ = ["trace_subgraphs", "Subgraph"]
+__all__ = ["trace_subgraphs", "Subgraph", "get_targets_from_modifiers"]
 
 
 @dataclass
@@ -77,15 +80,15 @@ def trace_subgraphs(
     :param sample_input: inputs whose values will change during execution but whose
         __len__, __bool__, and __contains__ values are assumed constant across batches
     :param sequential_targets: list of patterns matching sequential targets
-    :param ignore: list of patterns matching modules to ignore during tracing
+    :param ignore: list of module method patterns to skip during tracing
     :return: a list of Subgraphs in order of execution
     """
     # find modules
     sequential_targets = match_modules(model, sequential_targets)
-    ignore = match_modules(model, ignore)
+    add_autowrap_methods(model, ignore)
 
     # initialize arguments
-    tracer = get_tracer(model, sequential_targets, ignore)
+    tracer = get_tracer(model, sequential_targets)
     concrete_args = populate_concrete_args(model, sample_input)
 
     # trace
@@ -115,23 +118,31 @@ def trace_subgraphs(
     return subgraphs
 
 
-def get_tracer(
-    model: Module, sequential_targets: Set[Module], ignore: Set[Module]
-) -> HFTracer:
+def get_tracer(model: Module, sequential_targets: Set[Module]) -> HFTracer:
     """
     Get a tracer specialized for the given model. The resulting tracer will not trace
-    inside of sequential targets, ignored targets, or offloaded modules.
+    inside of sequential targets, nor any modules which are not call graph ancestors of
+    sequential targets
 
-    Tracing within sequential targets and ignored targets is unnecessary, and tracing
-    within offloaded modules may result in meta tensors being added to the model graph
+    Tracing within sequential targets is unnecessary, and tracing within offloaded
+    modules may result in meta tensors being added to the model graph
 
     :param model: model being traced
     :param sequential_targets: modules which are sequential targets
-    :param ignore: modules which are ignored
     """
-    # TODO: redefine skip_trace_modules to all non-ancestors of sequential_targets
+    sequential_ancestors = get_sequential_ancestors(model, sequential_targets)
     offloaded_modules = set(m for m in model.modules() if has_offloaded_params(m))
-    skip_trace_modules = sequential_targets | offloaded_modules | ignore
+
+    # check unlikely case that ancestors have direct params which are offloaded
+    offloaded_ancestors = offloaded_modules & sequential_ancestors
+    if offloaded_ancestors:
+        names = set(module.__class__.__name__ for module in offloaded_ancestors)
+        logger.warning(
+            "The following modules are call graph ancestors of sequential targets,"
+            f"but also contain offloaded modules: {names}.\n"
+            "These modules will not be traced, and any sequential target children will "
+            "be executed jointly, which may lead to OOM errors"
+        )
 
     class SequentialTracer(HFTracer):
         def create_arg(self, a: Any) -> Argument:
@@ -144,9 +155,7 @@ def create_arg(self, a: Any) -> Argument:
                 return super().create_arg(a)
 
         def is_leaf_module(self, module: Module, module_qualified_name: str) -> bool:
-            return module in skip_trace_modules or super().is_leaf_module(
-                module, module_qualified_name
-            )
+            return module not in sequential_ancestors or module in offloaded_modules
 
         def trace(self, root: Union[Module, Callable], *args, **kwargs) -> Graph:
             if isinstance(root, Module):
@@ -396,7 +405,119 @@ def match_modules(model: Module, target_names: List[str]) -> Set[Module]:
     )
 
 
+def get_targets_from_modifiers(
+    modifiers: List[Modifier], model: PreTrainedModel
+) -> List[str]:
+    """
+    Infer sequential targets and ignore list from modifiers list
+
+    :param model: model being calibrated
+    :param modifiers: list of modifiers being applied during calibration
+    :return: list of sequential targets
+    """
+    # avoid circular import
+    from llmcompressor.pipelines.registry import SEQUENTIAL_MODIFIERS
+
+    sequential_modifiers = [
+        modifier for modifier in modifiers if isinstance(modifier, SEQUENTIAL_MODIFIERS)
+    ]
+
+    if len(sequential_modifiers) >= 2:
+        types = [type(modifier) for modifier in sequential_modifiers]
+        logger.warning(
+            "Cannot infer sequential targets from multiple sequential modifiers "
+            f"({types}). Defaulting to {types[0]}"
+        )
+    elif len(sequential_modifiers) <= 0:
+        types = [type(modifier) for modifier in modifiers]
+        raise ValueError(f"Cannot infer sequential targets from list of {types}")
+
+    modifier = sequential_modifiers[0]
+
+    # infer sequential targets
+    if modifier.sequential_targets is None:
+        sequential_targets = get_no_split_params(model)
+    if isinstance(modifier.sequential_targets, str):
+        sequential_targets = [modifier.sequential_targets]
+
+    return sequential_targets
+
+
 def add_line_numbers(text: str) -> str:
     lines = text.splitlines()
     numbered_lines = [f"{i + 1} {line}" for i, line in enumerate(lines)]
     return "\n".join(numbered_lines)
+
+
+def get_sequential_ancestors(model: Module, targets: Set[Module]) -> Set[Module]:
+    """
+    Find modules which are call graph ancestors of the given sequential targets
+
+    :param model: model containing sequential targets
+    :param targets: sequential targets to find ancestors of
+    :return: call graph ancestors of sequential targets
+    """
+    ancestors = set()
+
+    def is_ancestor(module: Module) -> bool:
+        if module in ancestors or module in targets:
+            return True
+
+        # eagerly compute list in order to avoid early stopping and :. missing ancestors
+        _is_ancestor = any([is_ancestor(child) for child in module.children()])
+        if _is_ancestor:
+            ancestors.add(module)
+
+        return _is_ancestor
+
+    is_ancestor(model)
+    return ancestors
+
+
+def add_autowrap_methods(model: Module, ignore: List[str]):
+    """
+    Find wrap module methods which should be skipped during tracing
+
+    Commonly used to wrap Model._update_causal_mask which contains complex masking logic
+    which is often untraceable
+
+    :param model: model containing modules whose methods should be wrapped
+    :param ignore: list of module method patterns to skip during tracing
+    """
+    module_classes = set(type(module) for module in model.modules())
+
+    for pattern in ignore:
+        num_dots = pattern.count(".")
+        matched_modules = []
+
+        if num_dots == 0:
+            method_name = pattern
+            for cls in module_classes:
+                if hasattr(cls, method_name):
+                    _symbolic_trace._wrapped_methods_to_patch.append((cls, method_name))
+                    matched_modules.append(cls)
+
+        elif num_dots == 1:
+            cls_name, method_name = pattern.split(".")
+            for cls in module_classes:
+                if cls.__name__ == cls_name and hasattr(cls, method_name):
+                    _symbolic_trace._wrapped_methods_to_patch.append((cls, method_name))
+                    matched_modules.append(cls)
+
+        else:
+            raise ValueError()
+
+        if len(matched_modules) <= 0:
+            raise ValueError(
+                f"Unable to match {pattern} to any of the following module classes: "
+                f"{module_classes}\nPlease make sure that the method you'd like to "
+                "ignore exists within the model to trace. Auto-wrapping functions "
+                "which are not module methods is not yet supported"
+            )
+
+        if len(matched_modules) >= 2:
+            logger.warning(
+                f"Matched {pattern} to multiple module classes {matched_modules}. If "
+                "this is not intended, please ignore using the following pattern: "
+                "{module}.{method}"
+            )
diff --git a/src/llmcompressor/pipelines/sequential/pipeline.py b/src/llmcompressor/pipelines/sequential/pipeline.py
index 134ad71b27..adee9ec912 100644
--- a/src/llmcompressor/pipelines/sequential/pipeline.py
+++ b/src/llmcompressor/pipelines/sequential/pipeline.py
@@ -1,27 +1,27 @@
-from typing import TYPE_CHECKING, List, Optional
+from typing import TYPE_CHECKING
 
 import torch
-import torch.utils.data.dataloader
 import tqdm
 from compressed_tensors.utils import get_execution_device
+from torch.utils.data.dataloader import DataLoader
 
+from llmcompressor.core import LifecycleCallbacks, active_session
 from llmcompressor.modifiers.utils.hooks import HooksMixin
 from llmcompressor.pipelines.cache import IntermediatesCache
-from llmcompressor.pipelines.sequential.helpers import trace_subgraphs
+from llmcompressor.pipelines.sequential.helpers import (
+    get_targets_from_modifiers,
+    trace_subgraphs,
+)
 from llmcompressor.utils.helpers import DisableQuantization, calibration_forward_context
 
 if TYPE_CHECKING:
-    from llmcompressor.modifiers import Modifier
+    from llmcompressor.args.dataset_arguments import DatasetArguments
 
 __all__ = ["run_pipeline"]
 
 
 def run_pipeline(
-    model: torch.nn.Module,
-    dataloader: torch.utils.data.DataLoader,
-    sequential_targets: List[str],
-    ignore: List[str],
-    callback_modifier: Optional["Modifier"] = None,
+    model: torch.nn.Module, dataloader: DataLoader, dataset_args: "DatasetArguments"
 ):
     """
     Run a sequential data pipeline according to the following steps:
@@ -44,12 +44,20 @@ def run_pipeline(
     :param model: model being calibrated
     :param dataloader: loads data for calibration
     :param sequential_targets: patterns which match to the layer modules of the model
-    :param ignore: patterns which match to modules which should be ignored by tracing
+    :param ignore: list of module method patterns to skip during tracing
     """
+    session = active_session()
+
+    # infer sequential targets
+    modifiers = session.get_modifiers()
+    sequential_targets = get_targets_from_modifiers(modifiers, model)
+    ignore = dataset_args.tracing_ignore
+
     # trace subgraphs
     sample_input = next(iter(dataloader))
     subgraphs = trace_subgraphs(model, sample_input, sequential_targets, ignore)
 
+    session.initialize()
     with calibration_forward_context(model), DisableQuantization(model):
         # prepare intermediates cache
         model_device = get_execution_device(model)
@@ -66,9 +74,8 @@ def run_pipeline(
                 inputs = intermediates.fetch(batch_index, subgraph.input_names)
                 subgraph.forward(model, **inputs)
 
-            # TODO: replace with a lifecycle event
-            if callback_modifier:
-                callback_modifier.on_sequential_batch_end()
+            # trigger compression
+            LifecycleCallbacks.sequential_epoch_end()
 
             # this pass does not trigger modifier hooks
             # and is only used for capturing outputs from the newly compressed modules
@@ -80,3 +87,6 @@ def run_pipeline(
                     if subgraph_index < num_subgraphs - 1:
                         intermediates.update(batch_index, output)
                         intermediates.delete(batch_index, subgraph.consumed_names)
+
+        # redudant, finish any remaining compression
+        LifecycleCallbacks.calibration_epoch_end()
diff --git a/src/llmcompressor/typing.py b/src/llmcompressor/typing.py
index 1050f7138b..6ee001f66e 100644
--- a/src/llmcompressor/typing.py
+++ b/src/llmcompressor/typing.py
@@ -1,13 +1,18 @@
-from typing import Union
+from typing import TYPE_CHECKING, Callable, Union
 
+import torch
 from datasets import Dataset, DatasetDict, IterableDataset
 from transformers import (
     BaseImageProcessor,
     FeatureExtractionMixin,
+    PreTrainedModel,
     PreTrainedTokenizer,
     ProcessorMixin,
 )
 
+if TYPE_CHECKING:
+    from llmcompressor.args.dataset_arguments import DatasetArguments
+
 # Tokenizer or Processor. Processors do not inherit from a unified base class
 Processor = Union[
     PreTrainedTokenizer, BaseImageProcessor, FeatureExtractionMixin, ProcessorMixin
@@ -15,3 +20,8 @@
 
 # Supported dataset types, IterableDataset is a streamed dataset
 DatasetType = Union[Dataset, DatasetDict, IterableDataset]
+
+# Pipeline callable
+PipelineFn = Callable[
+    [PreTrainedModel, torch.utils.data.DataLoader, "DatasetArguments"], None
+]
diff --git a/tests/llmcompressor/modifiers/calibration/test_kv_cache.py b/tests/llmcompressor/modifiers/calibration/test_kv_cache.py
index 25b8468f41..b22e7ec401 100644
--- a/tests/llmcompressor/modifiers/calibration/test_kv_cache.py
+++ b/tests/llmcompressor/modifiers/calibration/test_kv_cache.py
@@ -25,7 +25,7 @@
     calibrate_kv_cache_input_hook,
     calibrate_kv_cache_output_hook,
     freeze_module_quantization,
-    set_unset_kv_cache,
+    initialize_quantized_kv_cache,
 )
 
 config = {
@@ -75,7 +75,7 @@ def test_kv_cache_quantization(config):
     config = QuantizationConfig(**config)
     config.quantization_status = QuantizationStatus.CALIBRATION
     apply_quantization_config(model, config)
-    model.apply(set_unset_kv_cache)
+    model.apply(initialize_quantized_kv_cache)
     model.apply(_prep_for_calibration)
 
     with torch.no_grad():
diff --git a/tests/llmcompressor/modifiers/quantization/test_base.py b/tests/llmcompressor/modifiers/quantization/test_base.py
index 11e630c191..2a8c58ea46 100644
--- a/tests/llmcompressor/modifiers/quantization/test_base.py
+++ b/tests/llmcompressor/modifiers/quantization/test_base.py
@@ -2,7 +2,6 @@
 
 import pytest
 
-from llmcompressor.core.events import Event
 from llmcompressor.modifiers.factory import ModifierFactory
 from llmcompressor.modifiers.quantization import QuantizationModifier
 from tests.llmcompressor.modifiers.conf import setup_modifier_factory
@@ -25,51 +24,3 @@ def test_quantization_registered(self):
         )
 
         self.assertIsInstance(quant_obj, QuantizationModifier)
-
-
-@pytest.mark.unit
-class TestEndEpochs(unittest.TestCase):
-    def setUp(self):
-        self.start = 0.0
-        self.scheme = dict(
-            input_activations=dict(num_bits=8, symmetric=True),
-            weights=dict(num_bits=6, symmetric=False),
-        )
-
-    def test_end_epochs(self):
-        disable_quant_epoch = None
-        obj_modifier = QuantizationModifier(
-            start=self.start,
-            scheme=self.scheme,
-            disable_quantization_observer_epoch=disable_quant_epoch,
-            config_groups={},
-        )
-
-        self.assertEqual(obj_modifier.calculate_disable_observer_epoch(), -1)
-
-        for epoch in range(3):
-            event = Event(steps_per_epoch=1, global_step=epoch)
-            assert not obj_modifier.check_should_disable_observer(event)
-
-        disable_quant_epoch = 3.5
-        obj_modifier = QuantizationModifier(
-            start=self.start,
-            scheme=self.scheme,
-            disable_quantization_observer_epoch=disable_quant_epoch,
-            config_groups={},
-        )
-
-        self.assertEqual(
-            obj_modifier.calculate_disable_observer_epoch(), disable_quant_epoch
-        )
-
-        for epoch in range(4):
-            event = Event(steps_per_epoch=1, global_step=epoch)
-            assert not obj_modifier.check_should_disable_observer(event)
-
-        event = Event(steps_per_epoch=1, global_step=4)
-        assert obj_modifier.check_should_disable_observer(event)
-
-        for epoch in range(5, 8):
-            event = Event(steps_per_epoch=1, global_step=epoch)
-            assert obj_modifier.check_should_disable_observer(event)
diff --git a/tests/llmcompressor/pipelines/sequential/test_helpers.py b/tests/llmcompressor/pipelines/sequential/test_helpers.py
new file mode 100644
index 0000000000..e098035ea2
--- /dev/null
+++ b/tests/llmcompressor/pipelines/sequential/test_helpers.py
@@ -0,0 +1,60 @@
+import pytest
+import torch
+from torch.fx import _symbolic_trace
+
+from llmcompressor.pipelines.sequential.helpers import (
+    add_autowrap_methods,
+    get_sequential_ancestors,
+)
+from llmcompressor.utils.helpers import patch_attr
+
+
+class DummyModel(torch.nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.seq = torch.nn.Sequential(torch.nn.Linear(10, 20), torch.nn.ReLU())
+        self.fc = torch.nn.Linear(20, 5)
+
+    def forward(self, x):
+        x = self.seq(x)
+        return self.fc(x)
+
+
+@pytest.fixture(scope="module")
+def model():
+    return DummyModel()
+
+
+def test_get_sequential_ancestors(model):
+    assert get_sequential_ancestors(model, set()) == set()
+    assert get_sequential_ancestors(model, {model}) == set()
+    assert get_sequential_ancestors(model, {model.fc}) == {model}
+    assert get_sequential_ancestors(model, {model.seq[0]}) == {model, model.seq}
+    assert get_sequential_ancestors(model, {model.seq[1]}) == {model, model.seq}
+
+
+def test_add_autowrap_methods(model):
+    with patch_attr(_symbolic_trace, "_wrapped_methods_to_patch", []):
+        add_autowrap_methods(model, ["ReLU.forward"])
+        assert _get_matched_modules() == {torch.nn.ReLU}
+
+    with patch_attr(_symbolic_trace, "_wrapped_methods_to_patch", []):
+        add_autowrap_methods(model, ["Linear.forward"])
+        assert _get_matched_modules() == {torch.nn.Linear}
+
+    with patch_attr(_symbolic_trace, "_wrapped_methods_to_patch", []):
+        add_autowrap_methods(model, ["pop"])
+        assert _get_matched_modules() == {torch.nn.Sequential}
+
+    with patch_attr(_symbolic_trace, "_wrapped_methods_to_patch", []):
+        add_autowrap_methods(model, ["forward"])
+        assert _get_matched_modules() == {
+            DummyModel,
+            torch.nn.Sequential,
+            torch.nn.Linear,
+            torch.nn.ReLU,
+        }
+
+
+def _get_matched_modules():
+    return set(module for module, _ in _symbolic_trace._wrapped_methods_to_patch)
diff --git a/tests/llmcompressor/pytorch/modifiers/pruning/sparsegpt/test_pytorch.py b/tests/llmcompressor/pytorch/modifiers/pruning/sparsegpt/test_pytorch.py
index ab63a5414d..14fd7dcb89 100644
--- a/tests/llmcompressor/pytorch/modifiers/pruning/sparsegpt/test_pytorch.py
+++ b/tests/llmcompressor/pytorch/modifiers/pruning/sparsegpt/test_pytorch.py
@@ -1,13 +1,11 @@
 import unittest
 
 import pytest
-from compressed_tensors.quantization import QuantizationScheme
+import torch
 from parameterized import parameterized
 
 from llmcompressor.modifiers.obcq import SparseGPTModifier
 from llmcompressor.modifiers.quantization.gptq import GPTQModifier
-from llmcompressor.modifiers.quantization.quantization import QuantizationModifier
-from llmcompressor.utils.pytorch.module import qat_active
 from tests.llmcompressor.modifiers.conf import (
     LifecyleTestingHarness,
     setup_modifier_factory,
@@ -62,50 +60,26 @@ def test_successful_layerwise_recipe(self):
 
 
 @pytest.mark.unit
-class TestCreateDefaultQuantModifier(unittest.TestCase):
+class TestApplyQuantization(unittest.TestCase):
     def setUp(self):
         setup_modifier_factory()
 
     def test_create_default_quant_modifier(self):
-        modifier = GPTQModifier(block_size=128)
-        assert modifier._quantization_modifier is None
-
-        testing_harness = LifecyleTestingHarness(model=LinearNet())
-        modifier._check_build_quant_modifier(testing_harness.get_state().model)
-        assert modifier.quantize
-        assert isinstance(modifier._quantization_modifier, QuantizationModifier)
-        modifier._quantization_modifier.create_init_config()
-        default_config_group_name = "group_0"
-        should_be_default_quant_scheme = modifier._quantization_modifier.config_groups[
-            default_config_group_name
-        ]
-        assert should_be_default_quant_scheme.input_activations is None
-        assert should_be_default_quant_scheme.weights is None
-
-
-@pytest.mark.unit
-class TestSetQuantIfModifierAlreadyExists(unittest.TestCase):
-    def setUp(self):
-        setup_modifier_factory()
+        modifier = GPTQModifier(block_size=128, targets=["Linear"], scheme="FP8")
 
-    def test_set_quant_if_modifer_already_exists(self):
-        model = LinearNet()
-        scheme = QuantizationScheme(
-            targets=["Linear"],
-            input_activations=dict(num_bits=8, symmetric=True),
-            weights=dict(num_bits=4, symmetric=False),
-        )
-
-        modifier = QuantizationModifier(config_groups={"group_0": scheme})
-        testing_harness = LifecyleTestingHarness(model=model, start=-1)
-
-        assert not qat_active(testing_harness.get_state().model)
+        testing_harness = LifecyleTestingHarness(model=LinearNet(), start=-1)
         modifier.initialize(testing_harness.get_state())
-        assert qat_active(testing_harness.get_state().model)
 
-        modifier = GPTQModifier(block_size=128)
-        assert not modifier._quantization_modifier
-        assert modifier.quantize
+        model = testing_harness.state.model
+        for module in model.modules():
+            if isinstance(module, torch.nn.Linear):
+                assert hasattr(module, "quantization_scheme")
+                assert hasattr(module, "input_observer")
+                assert hasattr(module, "weight_observer")
+                pre_hooks = list(module._forward_pre_hooks.values())
+                post_hooks = list(module._forward_hooks.values())
+                assert pre_hooks[0].__name__ == "calibrate_input_hook"
+                assert post_hooks[0].__name__ == "calibrate_module"
 
 
 class TestSetQuantInGPTQ(unittest.TestCase):
@@ -131,24 +105,17 @@ def setUp(self):
                 }
             }
         }
-        self.quant_config = {"QuantizationModifier": self.quant_kwargs}
 
     def test_set_quant_in_gptq(self):
-        modifier = GPTQModifier(block_size=128, quantize=self.quant_config)
-        assert modifier._quantization_modifier is None
-
-        testing_harness = LifecyleTestingHarness(model=LinearNet())
-        modifier._check_build_quant_modifier(testing_harness.get_state().model)
-        assert modifier.quantize
-        self.assertIsInstance(modifier._quantization_modifier, QuantizationModifier)
+        modifier = GPTQModifier(block_size=128, **self.quant_kwargs)
+        config = modifier.resolve_quantization_config()
 
-        dict_scheme = dict(modifier._quantization_modifier.config_groups)
         self._check_config(
-            dict(dict_scheme["config_group_0"].weights),
+            dict(config.config_groups["config_group_0"].weights),
             self.quant_kwargs["config_groups"]["config_group_0"]["weights"],
         )
         self._check_config(
-            dict(dict_scheme["config_group_0"].input_activations),
+            dict(config.config_groups["config_group_0"].input_activations),
             self.quant_kwargs["config_groups"]["config_group_0"]["input_activations"],
         )
 
diff --git a/tests/llmcompressor/transformers/kv_cache/test_kv_cache.py b/tests/llmcompressor/transformers/kv_cache/test_kv_cache.py
index dafedbfa3b..1e72eb7183 100644
--- a/tests/llmcompressor/transformers/kv_cache/test_kv_cache.py
+++ b/tests/llmcompressor/transformers/kv_cache/test_kv_cache.py
@@ -222,7 +222,6 @@ def test_kv_cache_gptq_model_state_dict_attr(kv_cache_fixture, tmp_path):
                     dynamic: {dynamic}
                     symmetric: {symmetric}
             GPTQModifier:
-                sequential_update: false
                 ignore: ["lm_head"]
                 config_groups:
                     group_0:
diff --git a/tests/llmcompressor/transformers/obcq/test_obcq_lm_head.py b/tests/llmcompressor/transformers/obcq/test_obcq_lm_head.py
index f87cae28d2..268a38f68a 100644
--- a/tests/llmcompressor/transformers/obcq/test_obcq_lm_head.py
+++ b/tests/llmcompressor/transformers/obcq/test_obcq_lm_head.py
@@ -44,7 +44,7 @@ def test_no_lm_head_target(self):
 
         state = State()
         state.update(model=self.model, device=self.device, calib_data=self.dataloader)
-        modifier.on_initialize(state)
+        modifier.initialize(state)
 
         assert len(self.model.lm_head._forward_hooks) <= 0
 
@@ -56,7 +56,7 @@ def test_lm_head_target(self):
 
         state = State()
         state.update(model=self.model, device=self.device, calib_data=self.dataloader)
-        modifier.on_initialize(state)
+        modifier.initialize(state)
 
         assert len(self.model.lm_head._forward_hooks) == 1