feat(logging): add verbose mode for timed progress messages

johnmarktaylor91 · claude · johnmarktaylor91 · commit 0603f1035b8b · 2026-03-08T14:59:33.000-04:00
Add `verbose: bool = False` parameter to `log_forward_pass`,
`show_model_graph`, and internal pipeline functions. When enabled, prints
`[torchlens]`-prefixed progress at each major pipeline stage with timing.
Also fixes `_trim_and_reorder_model_history_fields` to preserve all
non-ordered attributes (not just private ones).

Co-Authored-By: Claude Opus 4.6 &lt;noreply@anthropic.com&gt;
diff --git a/torchlens/capture/trace.py b/torchlens/capture/trace.py
@@ -49,6 +49,7 @@
 from ..utils.arg_handling import safe_copy_args, safe_copy_kwargs, normalize_input_args
 from .source_tensors import log_source_tensor
 from ..data_classes.interface import _give_user_feedback_about_lookup_key
+from ..utils.display import _vprint, _vtimed
 
 
 def save_new_activations(
@@ -133,6 +134,7 @@ def save_new_activations(
         self._raw_layer_dict.pop(label, None)
 
     # Now run and log the new inputs.
+    _vprint(self, "Running fast pass (saving requested activations)")
     self._run_and_log_inputs_through_model(
         model, input_args, input_kwargs, layers_to_save, random_seed
     )
@@ -431,12 +433,23 @@ def run_and_log_inputs_through_model(
         # Per-session model preparation
         _prepare_model_session(self, model, self._optimizer)
         self.elapsed_time_setup = time.time() - self.pass_start_time
+        _vprint(self, f"Model prepared ({self.elapsed_time_setup:.2f}s)")
+
+        # Print input summary
+        if getattr(self, "verbose", False):
+            devices = set()
+            for t in input_tensors:
+                if hasattr(t, "device"):
+                    devices.add(str(t.device))
+            device_str = ", ".join(sorted(devices)) if devices else "unknown"
+            _vprint(self, f"Inputs: {len(input_tensors)} tensor(s) on {device_str}")
 
         # Turn on the logging toggle and run the forward pass.
         # Inside this context, every decorated torch function will log its
         # inputs/outputs.  Source tensors (model inputs) are logged explicitly
         # before invoking the model; all subsequent operations are captured
         # automatically by the decorated wrappers.
+        _vprint(self, f"Running {self.logging_mode} forward pass...")
         with _state.active_logging(self):
             for i, t in enumerate(input_tensors):
                 log_source_tensor(self, t, "input", input_tensor_addresses[i])
@@ -446,10 +459,16 @@ def run_and_log_inputs_through_model(
         self.elapsed_time_forward_pass = (
             time.time() - self.pass_start_time - self.elapsed_time_setup
         )
+        _vprint(
+            self,
+            f"Forward pass complete ({self.elapsed_time_forward_pass:.2f}s, "
+            f"{len(self._raw_layer_dict)} raw operations)",
+        )
 
         output_tensors, output_tensor_addresses = _extract_and_mark_outputs(self, outputs)
 
         _cleanup_model_session(model, input_tensors)
+        _vprint(self, f"Postprocessing {len(self._raw_layer_dict)} operations...")
         self._postprocess(output_tensors, output_tensor_addresses)
 
     except Exception as e:
diff --git a/torchlens/data_classes/model_log.py b/torchlens/data_classes/model_log.py
@@ -112,6 +112,7 @@ def __init__(
         save_source_context: bool = False,
         save_rng_states: bool = False,
         detect_loops: bool = True,
+        verbose: bool = False,
     ):
         """Initialise a fresh ModelLog for a new logging session.
 
@@ -130,6 +131,7 @@ def __init__(
                 around each function call (used by FuncCallLocation).
             optimizer: Optional torch optimizer, used to annotate which params
                 have optimizers attached.
+            verbose: If True, print timed progress messages at each major pipeline stage.
         """
         # Callables are effectively immutable — deepcopy is unnecessary.
 
@@ -159,6 +161,7 @@ def __init__(
         self.save_source_context = save_source_context
         self.save_rng_states = save_rng_states
         self.detect_loops = detect_loops
+        self.verbose = verbose
         self.has_saved_gradients = False
         self.mark_input_output_distances = mark_input_output_distances
 
diff --git a/torchlens/postprocess/__init__.py b/torchlens/postprocess/__init__.py
@@ -69,6 +69,8 @@
 if TYPE_CHECKING:
     from ..data_classes.model_log import ModelLog
 
+from ..utils.display import _vprint, _vtimed
+
 
 def postprocess(
     self: "ModelLog", output_tensors: List[torch.Tensor], output_tensor_addresses: List[str]
@@ -97,89 +99,83 @@ def postprocess(
         _set_pass_finished(self)
         return
 
-    # Step 1: Add dedicated output nodes
-
-    _add_output_layers(self, output_tensors, output_tensor_addresses)
-
-    # Step 2: Trace which nodes are ancestors of output nodes
+    # Steps 1-3: Graph traversal (output nodes, ancestry, orphan removal)
+    with _vtimed(self, "Steps 1-3: Graph traversal"):
+        # Step 1: Add dedicated output nodes
+        _add_output_layers(self, output_tensors, output_tensor_addresses)
 
-    _find_output_ancestors(self)
+        # Step 2: Trace which nodes are ancestors of output nodes
+        _find_output_ancestors(self)
 
-    # Step 3: Remove orphan nodes, find nodes that don't terminate in output node
-
-    _remove_orphan_nodes(self)
+        # Step 3: Remove orphan nodes, find nodes that don't terminate in output node
+        _remove_orphan_nodes(self)
 
     # Step 4: Find min/max distance from input and output nodes.
     # Conditional: only runs when the user requested distance metadata.
-
     if self.mark_input_output_distances:
-        _mark_input_output_distances(self)
+        with _vtimed(self, "Step 4: Input/output distances"):
+            _mark_input_output_distances(self)
 
-    # Step 5: Starting from terminal single boolean tensors, mark the conditional branches.
+    # Steps 5-7: Control flow (conditional branches, module fixing, buffers)
+    with _vtimed(self, "Steps 5-7: Control flow"):
+        # Step 5: Starting from terminal single boolean tensors, mark the conditional branches.
+        _mark_conditional_branches(self)
 
-    _mark_conditional_branches(self)
+        # Step 6: Annotate the containing modules for all internally-generated tensors.
+        _fix_modules_for_internal_tensors(self)
 
-    # Step 6: Annotate the containing modules for all internally-generated tensors.
-    # Internally-initialized tensors (e.g., constants, arange results) don't know
-    # what module they belong to. This traces backward from input-descendant tensors
-    # to infer module containment. IMPORTANT: also appends module path suffixes to
-    # operation_equivalence_type, which affects Step 8 loop detection grouping.
-
-    _fix_modules_for_internal_tensors(self)
-
-    # Step 7: Fix the buffer passes and parent information.
-    # Connects buffer parents, merges duplicate buffer nodes (same module, same
-    # value, same parents), and assigns buffer pass numbers.
-
-    _fix_buffer_layers(self)
+        # Step 7: Fix the buffer passes and parent information.
+        _fix_buffer_layers(self)
 
     # Step 8: Identify all loops, mark repeated layers.
+    loop_desc = (
+        "Step 8: Loop detection (full)"
+        if self.detect_loops
+        else "Step 8: Loop detection (params only)"
+    )
+    with _vtimed(self, loop_desc):
+        if self.detect_loops:
+            _detect_and_label_loops(self)
+        else:
+            _group_by_shared_params(self)
 
-    if self.detect_loops:
-        _detect_and_label_loops(self)
-    else:
-        _group_by_shared_params(self)
+    # Steps 9-12: Labeling (label mapping, final info, rename, cleanup)
+    with _vtimed(self, "Steps 9-12: Labeling"):
+        # Step 9: Go down tensor list, get the mapping from raw tensor names to final tensor names.
+        _map_raw_labels_to_final_labels(self)
 
-    # Step 9: Go down tensor list, get the mapping from raw tensor names to final tensor names.
+        # Step 10: Log final info for all layers
+        _log_final_info_for_all_layers(self)
 
-    _map_raw_labels_to_final_labels(self)
+        # Step 11: Rename all raw labels to final labels
+        _rename_model_history_layer_names(self)
+        _trim_and_reorder_model_history_fields(self)
 
-    # Step 10: Log final info for all layers (operation numbers, module hierarchy,
-    # param tallies, structural flags). MUST run before Step 12 because lookup key
-    # generation in Step 12 needs module hierarchy data populated here.
-    _log_final_info_for_all_layers(self)
+        # Step 12: Remove unsaved layers, build lookup key mappings
+        _remove_unwanted_entries_and_log_remaining(self)
 
-    # Step 11: Rename all raw labels (e.g., "cos_3_raw") to final labels
-    # (e.g., "cos_1_3:2") in both ModelLog-level fields and LayerPassLog fields.
-    # Then reorder ModelLog fields into the canonical display order.
-    _rename_model_history_layer_names(self)
-    _trim_and_reorder_model_history_fields(self)
+    # Steps 13-18: Finalization
+    with _vtimed(self, "Steps 13-18: Finalization"):
+        # Step 13: Undecorate all saved tensors and remove saved grad_fns.
+        _undecorate_all_saved_tensors(self)
 
-    # Step 12: Remove unsaved layers (unless keep_unsaved_layers=True), build
-    # lookup key mappings, and log remaining layer metadata.
-    _remove_unwanted_entries_and_log_remaining(self)
+        # Step 14: Clear the cache after any tensor deletions for garbage collection purposes:
+        torch.cuda.empty_cache()
 
-    # Step 13: Undecorate all saved tensors and remove saved grad_fns.
-    _undecorate_all_saved_tensors(self)
+        # Step 15: Log time elapsed.
+        _log_time_elapsed(self)
 
-    # Step 14: Clear the cache after any tensor deletions for garbage collection purposes:
-    torch.cuda.empty_cache()
+        # Step 16: Populate ParamLog reverse mappings, linked params, num_passes, and gradient metadata.
+        _finalize_param_logs(self)
 
-    # Step 15: Log time elapsed.
-    _log_time_elapsed(self)
+        # Step 16.5: Build aggregate LayerLog objects from per-pass LayerPassLog entries.
+        _build_layer_logs(self)
 
-    # Step 16: Populate ParamLog reverse mappings, linked params, num_passes, and gradient metadata.
-    _finalize_param_logs(self)
-
-    # Step 16.5: Build aggregate LayerLog objects from per-pass LayerPassLog entries.
-    _build_layer_logs(self)
+        # Step 17: Build structured ModuleLog objects from raw module_* dicts.
+        _build_module_logs(self)
 
-    # Step 17: Build structured ModuleLog objects from raw module_* dicts.
-    _build_module_logs(self)
-
-    # Step 18: log the pass as finished, changing the ModelLog behavior to its user-facing version.
-
-    _set_pass_finished(self)
+        # Step 18: log the pass as finished, changing the ModelLog behavior to its user-facing version.
+        _set_pass_finished(self)
 
 
 def postprocess_fast(self: "ModelLog") -> None:
@@ -202,6 +198,7 @@ def postprocess_fast(self: "ModelLog") -> None:
     - Step 17: _build_module_logs — module structure doesn't change between
       passes and _module_build_data isn't repopulated in fast mode (#108).
     """
+    _vprint(self, "Fast-pass postprocessing...")
     # Use layer_dict_main_keys to get LayerPassLog directly (not LayerLog)
     for output_layer_label in self.output_layers:
         output_layer = self.layer_dict_main_keys[output_layer_label]
diff --git a/torchlens/postprocess/labeling.py b/torchlens/postprocess/labeling.py
@@ -584,8 +584,9 @@ def _trim_and_reorder_model_history_fields(self) -> None:
     new_dir_dict = OrderedDict()
     for field in MODEL_LOG_FIELD_ORDER:
         new_dir_dict[field] = getattr(self, field)
-    # Preserve private/internal fields not in the canonical order.
+    # Preserve all remaining fields not in the canonical order (private/internal
+    # fields AND runtime-config attributes like ``verbose``).
     for field, value in self.__dict__.items():
-        if field.startswith("_") and field not in new_dir_dict:
+        if field not in new_dir_dict:
             new_dir_dict[field] = value
     self.__dict__ = new_dir_dict
diff --git a/torchlens/user_funcs.py b/torchlens/user_funcs.py
@@ -28,7 +28,7 @@
 
 from .utils.introspection import get_vars_of_type_from_obj
 from .utils.rng import set_random_seed
-from .utils.display import warn_parallel
+from .utils.display import warn_parallel, _vprint
 from .utils.arg_handling import safe_copy_args, safe_copy_kwargs, normalize_input_args
 from .data_classes.model_log import (
     ModelLog,
@@ -83,6 +83,7 @@ def _run_model_and_save_specified_activations(
     save_source_context: bool = False,
     save_rng_states: bool = False,
     detect_loops: bool = True,
+    verbose: bool = False,
 ) -> ModelLog:
     """Run a forward pass with logging enabled, returning a populated ModelLog.
 
@@ -112,6 +113,7 @@ def _run_model_and_save_specified_activations(
         detect_loops: If True (default), run full isomorphic subgraph expansion to
             detect repeated patterns (loops). If False, only group operations that
             share the same parameters — much faster for very large graphs.
+        verbose: If True, print timed progress messages at each major pipeline stage.
 
     Returns:
         Fully-populated ModelLog.
@@ -140,6 +142,7 @@ def _run_model_and_save_specified_activations(
         save_source_context,
         save_rng_states,
         detect_loops,
+        verbose,
     )
     model_log._run_and_log_inputs_through_model(
         model, input_args, input_kwargs, layers_to_save, random_seed
@@ -179,6 +182,7 @@ def log_forward_pass(
     num_context_lines: int = 7,
     optimizer=None,
     detect_loops: bool = True,
+    verbose: bool = False,
 ) -> ModelLog:
     """Run a forward pass through *model*, log every operation, and return a ModelLog.
 
@@ -241,6 +245,7 @@ def log_forward_pass(
         random_seed: Fixed RNG seed for reproducibility with stochastic models.
         num_context_lines: Lines of source context to capture per function call.
         optimizer: Optional optimizer to annotate which params are being optimized.
+        verbose: If True, print timed progress messages at each major pipeline stage.
 
     Returns:
         A ``ModelLog`` containing layer activations (if requested) and full metadata.
@@ -279,12 +284,15 @@ def log_forward_pass(
             save_source_context=save_source_context,
             save_rng_states=save_rng_states,
             detect_loops=detect_loops,
+            verbose=verbose,
         )
     else:
         # --- TWO-PASS path ---
         # Pass 1 (exhaustive): Run with layers_to_save=None and keep_unsaved_layers=True
         # so the full graph is discovered and all layer labels are assigned.  No
         # activations are saved yet — this pass is purely for metadata/structure.
+        if verbose:
+            print("[torchlens] Two-pass mode: Pass 1 (exhaustive, metadata only)")
         model_log = _run_model_and_save_specified_activations(
             model=model,
             input_args=input_args,  # type: ignore[arg-type]
@@ -303,9 +311,11 @@ def log_forward_pass(
             save_source_context=save_source_context,
             save_rng_states=save_rng_states,
             detect_loops=detect_loops,
+            verbose=verbose,
         )
         # Pass 2 (fast): Now that layer labels exist, resolve the user's requested
         # layers and replay the model, saving only the matching activations.
+        _vprint(model_log, "Two-pass mode: Pass 2 (fast, saving requested layers)")
         model_log.keep_unsaved_layers = keep_unsaved_layers
         model_log.save_new_activations(
             model=model,
@@ -315,6 +325,14 @@ def log_forward_pass(
             random_seed=random_seed,
         )
 
+    # Print final summary.
+    _vprint(
+        model_log,
+        f"Done: {len(model_log.layer_logs)} layers, "
+        f"{model_log.num_tensors_saved} saved, "
+        f"{model_log.tensor_fsize_total_nice}",
+    )
+
     # Visualize if desired.
     if vis_opt != "none":
         model_log.render_graph(
@@ -386,6 +404,7 @@ def show_model_graph(
     vis_node_placement: str = "auto",
     random_seed: Optional[int] = None,
     detect_loops: bool = True,
+    verbose: bool = False,
 ) -> None:
     """Convenience wrapper: visualize the computational graph without saving activations.
 
@@ -428,6 +447,7 @@ def show_model_graph(
         save_gradients=False,
         random_seed=random_seed,
         detect_loops=detect_loops,
+        verbose=verbose,
     )
     # Render in a try/finally so temporary tl_ attributes on the model are
     # always cleaned up, even if Graphviz rendering raises.
diff --git a/torchlens/utils/display.py b/torchlens/utils/display.py
diff --git a/torchlens/visualization/rendering.py b/torchlens/visualization/rendering.py