Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
554 changes: 154 additions & 400 deletions auto_round/compressors/base.py

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion auto_round/export/export_to_autoround/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,4 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from .export import save_quantized_as_autoround, AutoRoundFormat
from .export import save_quantized_as_autoround, AutoRoundExportFormat
8 changes: 4 additions & 4 deletions auto_round/export/export_to_autoround/export.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@
)


class AutoRoundFormat(str, Enum):
class AutoRoundExportFormat(str, Enum):
# Weight: FP8, per-channel, may be extended to per-tensor in future
# Activation: FP8, per-tensor
FP8_STATIC = "fp8_static"
Expand Down Expand Up @@ -165,8 +165,8 @@ def pack_layer(layer_name, model, backend, device=None):
return pack_layer(layer_name, model, backend, device)

if (
backend == f"auto_round:{AutoRoundFormat.FP8.value}"
or backend == f"auto_round:{AutoRoundFormat.FP8_STATIC.value}"
backend == f"auto_round:{AutoRoundExportFormat.FP8.value}"
or backend == f"auto_round:{AutoRoundExportFormat.FP8_STATIC.value}"
):
from auto_round.export.export_to_autoround.export_to_fp8 import pack_layer

Expand Down Expand Up @@ -298,7 +298,7 @@ def save_quantized_as_autoround(output_dir, inplace=True, backend="auto_round:ex
if (
(kwargs.get("sym") is None or kwargs.get("sym"))
and ("gptq" not in backend and "awq" not in backend)
and (AutoRoundFormat.FP8_STATIC.value not in backend)
and (AutoRoundExportFormat.FP8_STATIC.value not in backend)
):
backend = backend.replace("auto_round", "auto_round:auto_gptq")

Expand Down
Loading
Loading