Fix weight name

mgoin · mgoin · commit f934b0eb556e · 2024-07-18T17:07:14.000-04:00
diff --git a/auto_fp8/quantize.py b/auto_fp8/quantize.py
@@ -143,12 +143,12 @@ def forward(self, x):
 =======
     def __init__(
         self,
-        qweight: torch.Tensor,
+        weight: torch.Tensor,
         weight_scale: torch.Tensor,
         bias: torch.nn.Parameter,
     ):
         super().__init__()
-        self.qweight = torch.nn.Parameter(qweight, requires_grad=False)
+        self.weight = torch.nn.Parameter(weight, requires_grad=False)
         self.weight_scale = torch.nn.Parameter(weight_scale, requires_grad=False)
         self.bias = bias
 
@@ -157,7 +157,7 @@ def forward(self, x):
         output = fp8_gemm(
             A=qinput,
             A_scale=x_scale,
-            B=self.qweight,
+            B=self.weight,
             B_scale=self.weight_scale,
             bias=self.bias,
             out_dtype=x.dtype,
@@ -169,13 +169,13 @@ def forward(self, x):
 class FP8StaticLinearQuantizer(torch.nn.Module):
     def __init__(
         self,
-        qweight: torch.Tensor,
+        weight: torch.Tensor,
         weight_scale: torch.Tensor,
         bias: torch.nn.Parameter,
         quantize_output: bool = False,
     ):
         super().__init__()
-        self.qweight = torch.nn.Parameter(qweight, requires_grad=False)
+        self.weight = torch.nn.Parameter(weight, requires_grad=False)
         self.weight_scale = torch.nn.Parameter(weight_scale, requires_grad=False)
         self.bias = bias
         self.input_scale = None
@@ -191,7 +191,7 @@ def forward(self, x):
         output = fp8_gemm(
             A=qinput,
             A_scale=self.input_scale,
-            B=self.qweight,
+            B=self.weight,
             B_scale=self.weight_scale,
             bias=self.bias,
             out_dtype=x.dtype,
@@ -213,14 +213,14 @@ def forward(self, x):
 class FP8StaticLinear(torch.nn.Module):
     def __init__(
         self,
-        qweight: torch.nn.Parameter,
+        weight: torch.nn.Parameter,
         weight_scale: torch.nn.Parameter,
         bias: torch.nn.Parameter,
         input_scale: torch.nn.Parameter,
         output_scale: Optional[torch.nn.Parameter] = None,
     ):
         super().__init__()
-        self.qweight = qweight
+        self.weight = weight
         self.weight_scale = weight_scale
         self.bias = bias
         self.input_scale = input_scale
@@ -231,6 +231,7 @@ def forward(self, x):
         output = fp8_gemm(
             A=qinput,
             A_scale=self.input_scale,
+<<<<<<< HEAD
             B=self.qweight,
 >>>>>>> 3ee9283 (Support calibrating kv cache scales)
             B_scale=self.weight_scale,
@@ -314,6 +315,8 @@ def forward(self, x):
         output = fp8_gemm(
             A=qinput,
             A_scale=self.input_scale,
+=======
+>>>>>>> def2049 (Fix weight name)
             B=self.weight,
             B_scale=self.weight_scale,
             bias=self.bias,
@@ -353,11 +356,15 @@ def quantize_weights(
         quant_weight, weight_scale = per_tensor_quantize(linear.weight)
         bias = copy.deepcopy(linear.bias) if linear.bias is not None else None
         quant_linear = FP8DynamicLinear(
+<<<<<<< HEAD
 <<<<<<< HEAD
             weight=quant_weight, weight_scale=weight_scale, bias=bias
 =======
             qweight=quant_weight, weight_scale=weight_scale, bias=bias
 >>>>>>> 3ee9283 (Support calibrating kv cache scales)
+=======
+            weight=quant_weight, weight_scale=weight_scale, bias=bias
+>>>>>>> def2049 (Fix weight name)
         )
         replace_module(model, name, quant_linear)
         del linear.weight
@@ -379,11 +386,15 @@ def quantize_activations(
         ):
             continue
         quantizer = FP8StaticLinearQuantizer(
+<<<<<<< HEAD
 <<<<<<< HEAD
             weight=dynamic_quant_linear.weight,
 =======
             qweight=dynamic_quant_linear.qweight,
 >>>>>>> 3ee9283 (Support calibrating kv cache scales)
+=======
+            weight=dynamic_quant_linear.weight,
+>>>>>>> def2049 (Fix weight name)
             weight_scale=dynamic_quant_linear.weight_scale,
             bias=dynamic_quant_linear.bias,
             quantize_output=(
@@ -421,11 +432,15 @@ def quantize_activations(
         ):
             continue
         static_proj = FP8StaticLinear(
+<<<<<<< HEAD
 <<<<<<< HEAD
             weight=quantizer.weight,
 =======
             qweight=quantizer.qweight,
 >>>>>>> 3ee9283 (Support calibrating kv cache scales)
+=======
+            weight=quantizer.weight,
+>>>>>>> def2049 (Fix weight name)
             weight_scale=quantizer.weight_scale,
             bias=quantizer.bias,
             input_scale=quantizer.input_scale,