decouple vision tower

kylesayrs · kylesayrs · commit e71d4bbdf7b7 · 2025-08-06T23:40:30.000Z
Signed-off-by: Kyle Sayers &lt;kylesayrs@gmail.com&gt;
diff --git a/examples/quantization_w8a8_fp8/fp8_block_example.py b/examples/quantization_w8a8_fp8/fp8_block_example.py
@@ -15,9 +15,7 @@
 # In this case, we:
 #   * quantize the weights to fp8 with per channel via ptq
 #   * quantize the activations to fp8 with dynamic per token
-recipe = QuantizationModifier(
-    targets="Linear", scheme="FP8_BLOCK", ignore=["lm_head"]
-)
+recipe = QuantizationModifier(targets="Linear", scheme="FP8_BLOCK", ignore=["lm_head"])
 
 # Apply quantization.
 oneshot(model=model, recipe=recipe)
diff --git a/src/llmcompressor/pipelines/sequential/helpers.py b/src/llmcompressor/pipelines/sequential/helpers.py
@@ -277,14 +277,14 @@ def topological_partition(graph: GraphModule, targets: Set[Module]) -> List[List
     while len(queue) > 0:
         node = queue.popleft()
 
-        # assign to partition
-        partitions[partition_index].append(node)
-
         # guarantee targets are assigned to disjoint partitions
-        if node in target_nodes:
+        if node in target_nodes and len(partitions[partition_index]) > 0:
             partition_index += 1
             partitions.append([])
 
+        # assign to partition
+        partitions[partition_index].append(node)
+
         # recurse on last indegree only in order to guarantee that
         # the node is assigned to maximal partition
         for user in node.users: