Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 17 additions & 36 deletions fms_mo/aiu_addons/i8i8/i8i8_aiu_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,41 +97,22 @@ def _add_defaults_and_concat(
)


# registration of new adapter steps for each architecture
serialization.register_adapter_step("llama", "int8_qparams_aiu", _int8_qparams_aiu)
serialization.register_adapter_step(
"gpt_bigcode", "int8_qparams_aiu", _int8_qparams_aiu
)
serialization.register_adapter_step("roberta", "int8_qparams_aiu", _int8_qparams_aiu)
serialization.register_adapter_step(
"roberta_question_answering",
"int8_qparams_aiu",
_int8_qparams_aiu,
)

# registration of multi-step adapter for each architecture
serialization.register_adapter(
# registration of new adapter step and adapter for each architecture
for arch in [
"llama",
"fms_mo",
[
"hf_to_fms_names",
"hf_to_fms_rope",
"weight_fusion",
"int8_qparams_aiu",
],
)
serialization.register_adapter(
"gpt_bigcode", "fms_mo", ["hf_to_fms_names", "weight_fusion", "int8_qparams_aiu"]
)
serialization.register_adapter(
"roberta", "fms_mo", ["hf_to_fms_names", "weight_fusion", "int8_qparams_aiu"]
)
serialization.register_adapter(
"gpt_bigcode",
"granite",
"roberta",
"roberta_question_answering",
"fms_mo",
[
"hf_to_fms_names",
"weight_fusion",
"int8_qparams_aiu",
],
)
]:
serialization.register_adapter_step(arch, "int8_qparams_aiu", _int8_qparams_aiu)
if arch in ["llama", "granite"]:
steps_to_register = [
"hf_to_fms_names",
"hf_to_fms_rope",
"weight_fusion",
"int8_qparams_aiu",
]
else:
steps_to_register = ["hf_to_fms_names", "weight_fusion", "int8_qparams_aiu"]
serialization.register_adapter(arch, "fms_mo", steps_to_register)
10 changes: 5 additions & 5 deletions fms_mo/aiu_addons/i8i8/i8i8_aiu_linear.py
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is it required for AIU to make a "no bias" Linear layer into a Linear with bias (whose value is torch.zeros(1) )?

Copy link
Collaborator Author

@andrea-fasoli andrea-fasoli Apr 9, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's required by the AIU in the sense that the custom op always expects a tensor as bias (not None nor a bool).

We could change W8A8Linear forward

such that if self.bias does not exist we can create a zero tensor on the fly, cast it to the right device, and pass it to the op. I thought it'd be a more impactful overhead than just instantiating it once.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

just want to make sure it's a requirement from downstream stack. 8)

Original file line number Diff line number Diff line change
Expand Up @@ -84,10 +84,10 @@ def __init__(
"weight",
torch.zeros(out_features, in_features, dtype=torch.int8),
)
if bias:
self.register_buffer(
"bias", torch.zeros((out_features), dtype=torch.float16)
)

self.has_bias = bias
bias_size = out_features if self.has_bias else 1
self.register_buffer("bias", torch.zeros((bias_size), dtype=torch.float16))

if config.weight_per_channel:
w_clip_size = out_features
Expand Down Expand Up @@ -192,7 +192,7 @@ def __repr__(self) -> str:
return (
f"{self.__class__.__name__}"
f"(in={self.in_features}, out={self.out_features}, "
f"bias={self.bias is not None}, wq={self.weight_quant_type}, "
f"bias={self.has_bias}, wq={self.weight_quant_type}, "
f"aq={self.activ_quant_type}, smoothq={self.smoothquant}, "
f"op={self.aiu_op})"
)
Expand Down