Make changes to work with fms and aftu

ani300 · ani300 · commit cf2082efb95c · 2025-06-30T19:57:07.000Z
Signed-off-by: Antoni Viros i Martin &lt;aviros@ibm.com&gt;
diff --git a/fms_mo/aiu_addons/__init__.py b/fms_mo/aiu_addons/__init__.py
@@ -20,6 +20,9 @@ def _infer_quantization_config(quant_config: dict) -> dict | None:
             quant_config["config_groups"]["group_0"]["weights"]["type"] == "float"
             and quant_config["config_groups"]["group_0"]["weights"]["num_bits"] == 8
         ):
+            # First, import required FP8 linear classes from fms-mo
+            import fms_mo.aiu_addons.fp8.fp8_linear  # pylint: disable=unused-import
+            import fms_mo.aiu_addons.fp8.fp8_adapter  # pylint: disable=unused-import
             # This is used by get_linear to decide whether a linear layer
             # will be quantized or not inside the model
             def fp8_linear_type(name: str) -> str:
diff --git a/fms_mo/aiu_addons/fp8/fp8_attn.py b/fms_mo/aiu_addons/fp8/fp8_attn.py
@@ -251,9 +251,9 @@ def _spyre_scaled_paged_compute_op(
     query: torch.Tensor,
     key_cache: torch.Tensor,
     value_cache: torch.Tensor,
-    nheads: int,
-    kvheads: int,
-    p_dropout: float,
+    nheads: int,  # pylint: disable=unused-argument
+    kvheads: int,  # pylint: disable=unused-argument
+    p_dropout: float,  # pylint: disable=unused-argument
     scale_factor: Optional[float],
     **attn_kwargs,
 ) -> torch.Tensor:
diff --git a/fms_mo/aiu_addons/fp8/fp8_linear.py b/fms_mo/aiu_addons/fp8/fp8_linear.py
@@ -193,7 +193,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
                     )
                 qx = self._input_activation_quant_func_fp8(x, **input_quant_kwargs)
 
-                # Copied from torchao _linear_fp8_act_fp8_weight_impl 
+                # Copied from torchao _linear_fp8_act_fp8_weight_impl
                 # (with changes to support fp8 out)
                 scaled_mm_config = Float8MMConfig(use_fast_accum=True)
                 out_shape = get_out_shape(qx.shape, qweight.shape)

Original file line number	Diff line number	Diff line change
`@@ -193,7 +193,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:`
`193`	`193`	`)`
`194`	`194`	`qx = self._input_activation_quant_func_fp8(x, **input_quant_kwargs)`
`195`	`195`
`196`		`- # Copied from torchao _linear_fp8_act_fp8_weight_impl`
	`196`	`+ # Copied from torchao _linear_fp8_act_fp8_weight_impl`
`197`	`197`	`# (with changes to support fp8 out)`
`198`	`198`	`scaled_mm_config = Float8MMConfig(use_fast_accum=True)`
`199`	`199`	`out_shape = get_out_shape(qx.shape, qweight.shape)`