diff --git a/src/nncf/experimental/quantization/algorithms/weight_compression/__init__.py b/src/nncf/experimental/quantization/algorithms/weight_compression/__init__.py new file mode 100644 index 00000000000..e5a42efc0ef --- /dev/null +++ b/src/nncf/experimental/quantization/algorithms/weight_compression/__init__.py @@ -0,0 +1,10 @@ +# Copyright (c) 2025 Intel Corporation +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/src/nncf/experimental/quantization/algorithms/weight_compression/algorithm.py b/src/nncf/experimental/quantization/algorithms/weight_compression/algorithm.py new file mode 100644 index 00000000000..8ebdbb35134 --- /dev/null +++ b/src/nncf/experimental/quantization/algorithms/weight_compression/algorithm.py @@ -0,0 +1,144 @@ +# Copyright (c) 2025 Intel Corporation +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Iterable, Optional + +import torch + +import nncf +from nncf import AdvancedCompressionParameters +from nncf import CompressionFormat +from nncf import Dataset +from nncf import SensitivityMetric +from nncf.common.graph.graph import NNCFGraph +from nncf.common.graph.graph import NNCFNode +from nncf.common.tensor_statistics.statistic_point import StatisticPointsContainer +from nncf.common.utils.backend import BackendType +from nncf.experimental.quantization.quantizer import Quantizer +from nncf.quantization.algorithms.algorithm import Algorithm +from nncf.quantization.algorithms.weight_compression.algorithm import WeightCompression as OriginalWeightCompression +from nncf import CompressWeightsMode +from nncf import IgnoredScope +from nncf import BackupMode + +class WeightsCompression(Algorithm): + """ + Post-training Weight Compression algorithm implementation. + + Compresses weights of Linear and Embedding layers to 8-bit integer or + to 4-bit integer/float depending on mode, ratio and group size. + """ + + def __init__( + self, + mode: CompressWeightsMode, + quantizer: Quantizer, + ratio: float, + group_size: int, + ignored_scope: IgnoredScope, + all_layers: bool, + subset_size: int, + awq: bool, + scale_estimation: bool, + gptq: bool, + lora_correction: bool, + backup_mode: BackupMode, + sensitivity_metric: SensitivityMetric, + compression_format: CompressionFormat, + advanced_parameters: AdvancedCompressionParameters, + ) -> torch.fx.GraphModule: + """ + :param quantizer: Quantizer to use in WeightCompression algorithm. + :param subset_size: Number of data samples to calculate activation statistics used for assigning different + quantization precision. + :param awq: determines whether to use or not modified AWQ algorithm. + :param scale_estimation: determines whether to use or not scale estimation for 4 bit layers. + :param gptq: determines whether to use or not GPTQ algorithm. + :param lora_correction: determines whether to use or not LoRA Correction algorithm. + :param sensitivity_metric: The sensitivity metric for assigning quantization precision to layers. In order to + preserve the accuracy of the model, the more sensitive layers receives a higher precision. + :param compression_format: Describes the format in which the model is saved after weight compression. + :param advanced_parameters: advanced parameters for algorithms in compression pipeline. + """ + self._quantizer = quantizer + + self._mode = mode + self._awq = awq + self._gptq = gptq + self._scale_estimation = scale_estimation + self._subset_size = subset_size + self._advanced_parameters = advanced_parameters + self._lora_correction = lora_correction + self._ratio = ratio + self._group_size = group_size + self._all_layers = all_layers + self._backup_mode = backup_mode + self._sensitivity_metric = sensitivity_metric + self._compression_format = compression_format + + self._algo = OriginalWeightCompression( + mode=self._mode, + ratio=self._ratio, + group_size=self._group_size, + ignored_scope=ignored_scope, + all_layers=self._all_layers, + sensitivity_metric=self._sensitivity_metric, + awq=self._awq, + subset_size=self._subset_size, + scale_estimation=self._scale_estimation, + gptq=self._gptq, + lora_correction=self._lora_correction, + backup_mode=self._backup_mode, + compression_format=self._compression_format, + advanced_parameters=self._advanced_parameters, + ) + + def available_backends(self) -> list[BackendType]: + return self._algo.available_backends() + + def apply( + self, + model: torch.fx.GraphModule, + graph: NNCFGraph, + statistic_points: Optional[StatisticPointsContainer] = None, + dataset: Optional[Dataset] = None, + ) -> torch.fx.GraphModule: + self._algo.set_backend_entity(model) + + all_weight_params, ratio_defining_params, skipped_weight_params = ( + self._quantizer.get_weight_compression_parameters(model, graph) + ) + + return self._algo.apply_with_parameters( + model, + graph, + dataset, + statistic_points, + all_weight_params, + ratio_defining_params, + skipped_weight_params, + ) + + def get_statistic_points( + self, + model: torch.fx.GraphModule, + graph: NNCFGraph, + nodes_and_port_ids: Iterable[tuple[NNCFNode, int]], + ) -> StatisticPointsContainer: + """ + Returns statistic points, for which StatisticsCollector should collect statistics. + + :param model: Model for statistics collection. + :param graph: Model graph. + :param nodes_and_port_ids: Nodes and port ids for which statistics should be collected. + :return: Statistic points, for which StatisticsCollector should collect statistics. + """ + return self._algo.get_statistic_points(model, graph, nodes_and_port_ids) diff --git a/src/nncf/experimental/torch/fx/__init__.py b/src/nncf/experimental/torch/fx/__init__.py index 2ecdde60840..86cd9709f6b 100644 --- a/src/nncf/experimental/torch/fx/__init__.py +++ b/src/nncf/experimental/torch/fx/__init__.py @@ -9,5 +9,6 @@ # See the License for the specific language governing permissions and # limitations under the License. +from nncf.experimental.torch.fx.quantization.quantize_pt2e import compress_pt2e as compress_pt2e from nncf.experimental.torch.fx.quantization.quantize_pt2e import quantize_pt2e as quantize_pt2e from nncf.experimental.torch.fx.quantization.quantizer.openvino_quantizer import OpenVINOQuantizer as OpenVINOQuantizer diff --git a/src/nncf/experimental/torch/fx/quantization/quantize_pt2e.py b/src/nncf/experimental/torch/fx/quantization/quantize_pt2e.py index 3f0b3186310..1a496eedb2d 100644 --- a/src/nncf/experimental/torch/fx/quantization/quantize_pt2e.py +++ b/src/nncf/experimental/torch/fx/quantization/quantize_pt2e.py @@ -22,11 +22,14 @@ from torch.fx.passes.infra.pass_manager import PassManager import nncf +from nncf import AdvancedCompressionParameters from nncf import Dataset +from nncf import SensitivityMetric from nncf.common.factory import NNCFGraphFactory from nncf.common.logging import nncf_logger from nncf.common.utils.api_marker import api from nncf.experimental.quantization.algorithms.post_training.algorithm import ExperimentalPostTrainingQuantization +from nncf.experimental.quantization.algorithms.weight_compression.algorithm import WeightsCompression from nncf.experimental.torch.fx.constant_folding import constant_fold from nncf.experimental.torch.fx.quantization.quantizer.openvino_adapter import OpenVINOQuantizerAdapter from nncf.experimental.torch.fx.quantization.quantizer.openvino_quantizer import OpenVINOQuantizer @@ -34,6 +37,7 @@ from nncf.experimental.torch.fx.transformations import QUANTIZE_NODE_TARGETS from nncf.experimental.torch.fx.transformations import DuplicateDQPassNoAnnotations from nncf.experimental.torch.fx.transformations import compress_post_quantize_transformation +from nncf.quantization.algorithms.weight_compression.algorithm import get_weight_compression_configuration from nncf.quantization.advanced_parameters import AdvancedBiasCorrectionParameters from nncf.quantization.advanced_parameters import AdvancedSmoothQuantParameters from nncf.quantization.range_estimator import RangeEstimatorParameters @@ -157,3 +161,91 @@ def _quant_node_constraint(n: torch.fx.Node) -> bool: related to quantization """ return n.op == "call_function" and n.target in QUANTIZE_NODE_TARGETS + + +@api(canonical_alias="nncf.experimental.torch.fx.compress_pt2e") +def compress_pt2e( + model: torch.fx.GraphModule, + quantizer: Quantizer, + dataset: Optional[nncf.Dataset] = None, + awq: bool = False, + scale_estimation: bool = False, + gptq: bool = False, + lora_correction: bool = False, + subset_size: int = 128, # Dataset size to use + sensitivity_metric: Optional[SensitivityMetric] = None, + advanced_parameters: Optional[AdvancedCompressionParameters] = None, +) -> torch.fx.GraphModule: + """ + Applies Weight Compression to the torch.fx.GraphModule provided model + using provided torch.ao quantizer. + + :param model: A torch.fx.GraphModule instance to be quantized. + :param quantizer: Torch ao quantizer to annotate nodes in the graph with quantization setups + to convey the desired way of quantization. + :param dataset: A representative dataset for the + calibration process. + :param awq: Determines whether to use or not the modified AWQ algorithm. + :param scale_estimation: Determines whether to use or not scale estimation for 4-bit layers. + :param gptq: Determines whether to use or not GPTQ algorithm. + :param lora_correction: Determines whether to use or not LoRA Correction algorithm. + :param subset_size: Number of data samples to calculate activation statistics used for assigning different + quantization precision. + :param sensitivity_metric: The sensitivity metric for assigning quantization precision to layers. In order to + preserve the accuracy of the model, the more sensitive layers receive a higher precision. + :param advanced_parameters: Advanced parameters for algorithms in the compression pipeline. + """ + if isinstance(quantizer, OpenVINOQuantizer) or hasattr(quantizer, "get_nncf_weight_compression_parameters"): + quantizer = OpenVINOQuantizerAdapter(quantizer) + compression_format = nncf.CompressionFormat.DQ + else: + # TODO Support Third party quantizers here. + msg = "Only OpenVINO Quantizer is supported currently." + raise nncf.InternalError(msg) + + wc_config = quantizer.get_weight_compression_config() + + mode = wc_config.get("mode", None) + awq = awq + gptq = gptq + scale_estimation = scale_estimation + subset_size = subset_size + advanced_parameters = advanced_parameters + lora_correction = lora_correction + ratio = wc_config.get("ratio", 1) + group_size = wc_config.get("group_size", 128) + all_layers = wc_config.get("all_layers", False) + backup_mode = wc_config.get("backup_mode", nncf.BackupMode.INT8_ASYM) + sensitivity_metric = sensitivity_metric + compression_format = compression_format + ignored_scope = nncf.IgnoredScope() # This is already defined in the quantizer object + + weight_compression_configuration = get_weight_compression_configuration( + mode, + dataset, + ratio, + group_size, + all_layers, + awq, + scale_estimation, + gptq, + lora_correction, + ignored_scope, + sensitivity_metric, + backup_mode, + advanced_parameters, + ) + + quantization_algorithm = WeightsCompression( + quantizer=quantizer, + subset_size=subset_size, + compression_format=compression_format, + **weight_compression_configuration + ) + + # Here the model is annotated + transformed_model = quantizer.transform_prior_quantization(model) + nncf_graph = NNCFGraphFactory.create(transformed_model) + quantized_model = quantization_algorithm.apply(transformed_model, nncf_graph, dataset=dataset) + quantized_model = torch.fx.GraphModule(quantized_model, graph=quantized_model.graph) + return quantized_model diff --git a/src/nncf/experimental/torch/fx/quantization/quantizer/openvino_adapter.py b/src/nncf/experimental/torch/fx/quantization/quantizer/openvino_adapter.py index 2283d9d9dbb..b72df9d29f7 100644 --- a/src/nncf/experimental/torch/fx/quantization/quantizer/openvino_adapter.py +++ b/src/nncf/experimental/torch/fx/quantization/quantizer/openvino_adapter.py @@ -9,12 +9,15 @@ # See the License for the specific language governing permissions and # limitations under the License. +from typing import Any + import torch.fx from nncf.common.graph.graph import NNCFGraph from nncf.common.quantization.quantizer_setup import SingleConfigQuantizerSetup from nncf.experimental.quantization.quantizer import Quantizer from nncf.experimental.torch.fx.quantization.quantizer.openvino_quantizer import OpenVINOQuantizer +from nncf.quantization.algorithms.weight_compression.config import WeightCompressionParameters class OpenVINOQuantizerAdapter(Quantizer): @@ -30,3 +33,18 @@ def transform_prior_quantization(self, model: torch.fx.GraphModule) -> torch.fx. def get_quantization_setup(self, model: torch.fx.GraphModule, nncf_graph: NNCFGraph) -> SingleConfigQuantizerSetup: return self._quantizer.get_nncf_quantization_setup(model, nncf_graph) + + def get_weight_compression_parameters( + self, + model: torch.fx.GraphModule, + nncf_graph: NNCFGraph, + ) -> tuple[ + list[WeightCompressionParameters], + list[WeightCompressionParameters], + dict[str, int], + list[WeightCompressionParameters], + ]: + return self._quantizer.get_nncf_weight_compression_parameters(model, nncf_graph) + + def get_weight_compression_config(self) -> dict[str, Any]: + return self._quantizer.weight_compression_configuration diff --git a/src/nncf/experimental/torch/fx/quantization/quantizer/torch_ao_adapter.py b/src/nncf/experimental/torch/fx/quantization/quantizer/torch_ao_adapter.py index e3a6c1c8f42..5ae585981c5 100644 --- a/src/nncf/experimental/torch/fx/quantization/quantizer/torch_ao_adapter.py +++ b/src/nncf/experimental/torch/fx/quantization/quantizer/torch_ao_adapter.py @@ -211,4 +211,4 @@ def _unwrap_shared_qspec_safe(qspec: QuantizationSpec, edge_or_node_to_qspec: di if i == MAX_DEPTH: msg = f"Shared qspecs referenced to each other more than the limit: {MAX_DEPTH}" raise RuntimeError(msg) - return qspec + return qspec \ No newline at end of file diff --git a/src/nncf/quantization/algorithms/weight_compression/algorithm.py b/src/nncf/quantization/algorithms/weight_compression/algorithm.py index d0407a1eff4..ddab9b261ef 100644 --- a/src/nncf/quantization/algorithms/weight_compression/algorithm.py +++ b/src/nncf/quantization/algorithms/weight_compression/algorithm.py @@ -102,7 +102,7 @@ def get_weight_compression_configuration( ) return { - "mode": mode, + "mode": mode if isinstance(mode, nncf.CompressWeightsMode) else nncf.CompressWeightsMode(mode), "ratio": ratio or 1, "group_size": group_size, "all_layers": all_layers or False, @@ -491,6 +491,23 @@ def _get_ratio_defining_params( return ratio_defining_params + def _get_backup_config(self, weight_dtype: TensorDataType) -> WeightCompressionConfig: + """ + Returns the backup weight compression configuration based on the algorithm's backup mode. + + :param weight_dtype: Data type of the weight tensor. + :return: A WeightCompressionConfig object for the backup precision, or None if backup is + disabled or unsupported. + """ + if self._backup_mode == BackupMode.NONE: + return None + mode = ( + CompressWeightsMode.INT8_ASYM if self._backup_mode == BackupMode.INT8_ASYM else CompressWeightsMode.INT8_SYM + ) + if not self.is_weight_compression_supported(weight_dtype, mode): + return None + return WeightCompressionConfig(mode=mode) + def _get_primary_config(self, group_size: int) -> WeightCompressionConfig: codebook_values = None @@ -511,7 +528,6 @@ def _set_weight_compression_config( model: TModel, graph: NNCFGraph, statistics_points: StatisticPointsContainer, - group_size_values: dict[str, int], ) -> None: """ Sets the appropriate compression configuration for weights based on some criteria. @@ -527,12 +543,12 @@ def _set_weight_compression_config( primary_precision_weight_params = self._mixed_precision_algo.apply( model, graph, statistics_points, weight_params=ratio_defining_params ) - else: - primary_precision_weight_params = ratio_defining_params - - for weight_param in primary_precision_weight_params: - weight_param.compression_config = self._get_primary_config(group_size_values[weight_param.weight_name]) - + # ratio_defining_params are all in primary precision. Update parameters + # which need to be set to backup precision + for weight_param in ratio_defining_params: + if weight_param in primary_precision_weight_params: + continue + weight_param.compression_config = self._get_backup_config(weight_param.weight_dtype) # Check if group size is valid for each weight in ratio_defining_params failed_nodes = [] for w_params in ratio_defining_params: @@ -769,27 +785,65 @@ def is_weight_compression_supported( return is_supported_dtype and not no_bit_reduction + def _collect_statistics_and_statistic_points( + self, + model: TModel, + graph: NNCFGraph, + statistic_points: StatisticPointsContainer, + dataset: Dataset, + ratio_defining_params: list[WeightCompressionParameters], + all_weight_params: list[WeightCompressionParameters], + ) -> tuple[dict[str, WCTensorStatistic], StatisticPointsContainer]: + """ + Collects and computes statistics required for weight compression. + + :param model: Backend-specific model instance. + :param graph: Corresponding NNCFGraph of the model. + :param statistic_points: Container with pre-collected statistics, if available. + :param dataset: Dataset used for collecting statistics when not provided. + :param ratio_defining_params: List of parameters defining compression ratios. + :param all_weight_params: List of all weight compression parameters. + :return: A tuple containing collected statistics for weight compression and the updated statistic_points. + """ + if not dataset or not (self._data_aware_mixed_precision or self._data_aware_compression): + return None, statistic_points + weight_params = ratio_defining_params if self._backup_mode == BackupMode.NONE else all_weight_params + matmul_nodes_to_compress = [ + wp.node_with_weight + for wp in weight_params + if wp.node_with_weight.metatype in self._backend_entity.matmul_metatypes + ] + matmul_input_to_output_nodes_map = self.get_matmul_input_to_output_nodes_map(matmul_nodes_to_compress, graph) + if statistic_points is None: + statistic_points = self.get_statistic_points(model, graph, matmul_input_to_output_nodes_map.keys()) + statistic_points = self._collect_statistics(dataset, graph, model, statistic_points) + statistics = self._get_statistics_for_weights_compression( + matmul_input_to_output_nodes_map, statistic_points + ) + return statistics, statistic_points + def get_weight_compression_parameters( self, model: TModel, graph: NNCFGraph, - statistic_points: Optional[StatisticPointsContainer] = None, - dataset: Optional[Dataset] = None, - ) -> tuple[list[WeightCompressionParameters], Optional[dict[str, WCTensorStatistic]]]: + ) -> tuple[ + list[WeightCompressionParameters], + list[WeightCompressionParameters], + list[WeightCompressionParameters], + ]: """ Generates a list of weight compression parameters based on the Weight Compression algorithm configuration. Determines the appropriate quantization parameters for each node eligible for - weight compression. Also, Generates a mapping of target node names to the collected statistics - based on the provided statistic_points. If statistic_points is None, collects required - compression statistics on the given dataset. + weight compression. Also, returns a list of ratio defining parameters which are a subset of + all_weight_parameters. This is based on parameters like all_layers. Lastly, it gives a list + of skipped layers based on parameters like ignored scope or depending on the group size value + adjustment. :param model: Backend-specific input model. :param graph: NNCFGraph instance. - :param statistic_points: Optional pre-collected statistic points. - :param dataset: Optional dataset for statistics collection. - :return: A tuple consisting of a list of weight compression parameters, based on the Weight - Compression algorithm configuration, and a mapping of target node names to the - collected statistics. + :return: A tuple consisting of a list of all weight compression parameters, based on the Weight + Compression algorithm configuration, list of ratio defining parameters(weights that are used + for ratio calculation between primary and backup precisions), and list of weight parameters to skip. """ nodes_to_compress = self.get_nodes_to_compress(graph) @@ -814,8 +868,8 @@ def get_weight_compression_parameters( weight_dtype = self._backend_entity.get_weight_dtype(node, weight_port_id, model, graph) weight_shape = self._backend_entity.get_weight_shape(node, weight_port_id, graph) reduction_axes = self._backend_entity.get_reduction_axes(node, weight_port_id, graph) - wc_config = None + if is_target_node and self.is_weight_compression_supported(weight_dtype, self._mode): if ( self._group_size != -1 @@ -834,14 +888,7 @@ def get_weight_compression_parameters( f"node name: {node.node_name}. The node will be in {self._backup_mode} mode." ) - if self._backup_mode != BackupMode.NONE: - mode = ( - CompressWeightsMode.INT8_ASYM - if self._backup_mode == BackupMode.INT8_ASYM - else CompressWeightsMode.INT8_SYM - ) - if self.is_weight_compression_supported(weight_dtype, mode): - wc_config = WeightCompressionConfig(mode=mode) + wc_config = self._get_backup_config(weight_dtype) weight_params = WeightCompressionParameters( weight_name, node, weight_port_id, weight_dtype, weight_shape, reduction_axes, wc_config @@ -869,37 +916,11 @@ def get_weight_compression_parameters( else: group_size_values = {w_params.weight_name: self._group_size for w_params in ratio_defining_params} - # Collect statistics for the weights compression - statistics = None - if (self._data_aware_mixed_precision or self._data_aware_compression) and dataset: - weight_params = ratio_defining_params if self._backup_mode == BackupMode.NONE else all_weight_params - matmul_nodes_to_compress = [ - wp.node_with_weight - for wp in weight_params - if wp.node_with_weight.metatype in self._backend_entity.matmul_metatypes - ] - matmul_input_to_output_nodes_map = self.get_matmul_input_to_output_nodes_map( - matmul_nodes_to_compress, graph - ) - if statistic_points is None: - statistic_points = self.get_statistic_points(model, graph, matmul_input_to_output_nodes_map.keys()) - statistic_points = self._collect_statistics(dataset, graph, model, statistic_points) - statistics = self._get_statistics_for_weights_compression( - matmul_input_to_output_nodes_map, statistic_points - ) - - # Set weight compression configuration - self._set_weight_compression_config(ratio_defining_params, model, graph, statistic_points, group_size_values) - - # Print statistics - nncf_logger.info( - self._get_bitwidth_distribution_str(all_weight_params, ratio_defining_params, skipped_weight_params) - ) - - # Filter all_weight_params and by excluding nodes that should remain in their original floating-point precision - all_weight_params = list(filter(lambda w_params: w_params.compression_config is not None, all_weight_params)) + # Set these layers to primary config. Later we will set layers to backup precision according to Mixed precision + for weight_param in ratio_defining_params: + weight_param.compression_config = self._get_primary_config(group_size_values[weight_param.weight_name]) - return all_weight_params, statistics + return all_weight_params, ratio_defining_params, skipped_weight_params def apply( self, @@ -911,7 +932,44 @@ def apply( self.set_backend_entity(model) # Get processed weight compression parameters ready for compression - all_weight_params, statistics = self.get_weight_compression_parameters(model, graph, statistic_points, dataset) + all_weight_params, ratio_defining_params, skipped_weight_params = ( + self.get_weight_compression_parameters(model, graph) + ) + return self.apply_with_parameters( + model, + graph, + dataset, + statistic_points, + all_weight_params, + ratio_defining_params, + skipped_weight_params, + ) + + def apply_with_parameters( + self, + model: TModel, + graph: NNCFGraph, + dataset: Dataset, + statistic_points: StatisticPointsContainer, + all_weight_params: list[WeightCompressionParameters], + ratio_defining_params: list[WeightCompressionParameters], + skipped_weight_params: list[WeightCompressionParameters], + ): + # Collect statistics for the weights compression + statistics, statistic_points = self._collect_statistics_and_statistic_points( + model, graph, statistic_points, dataset, ratio_defining_params, all_weight_params + ) + # Set weight compression configuration + self._set_weight_compression_config(ratio_defining_params, model, graph, statistic_points) + + # Print statistics + nncf_logger.info( + self._get_bitwidth_distribution_str(all_weight_params, ratio_defining_params, skipped_weight_params) + ) + + # Filter all_weight_params and by excluding nodes that should remain in their original floating-point precision + all_weight_params = list(filter(lambda w_params: w_params.compression_config is not None, all_weight_params)) + if self._awq: model = self.awq_algo.apply(model, graph, all_weight_params, statistics, self._backend_entity) diff --git a/tests/executorch/test_quantizer.py b/tests/executorch/test_quantizer.py new file mode 100644 index 00000000000..9d2b68b2ba4 --- /dev/null +++ b/tests/executorch/test_quantizer.py @@ -0,0 +1,259 @@ +# Copyright (c) 2025 Intel Corporation +# Licensed under the Apache License, Version 2.0 (the "License"); + +from dataclasses import dataclass +from functools import partial +from typing import Any, Callable, Optional + +import dataclasses +import json +from enum import Enum + +import pytest +import torch +import torch.fx + +import nncf +from nncf.common.graph import NNCFGraph +from nncf.common.utils.os import safe_open +from nncf.experimental.torch.fx.nncf_graph_builder import GraphConverter +from nncf.experimental.torch.fx import compress_pt2e + +from tests.cross_fw.shared.nx_graph import compare_nx_graph_with_reference +from tests.cross_fw.shared.paths import TEST_ROOT +from tests.torch.test_models.synthetic import ShortTransformer +from tests.torch.test_models.llama import LlamaDecoderOnly +from tests.torch2.fx.helpers import get_torch_fx_model + +from torchao.quantization.pt2e.quantize_pt2e import prepare_pt2e, convert_pt2e + +from executorch.backends.openvino.quantizer.quantizer import ( + OpenVINOQuantizer, + QuantizationMode, +) +from nncf.common.graph.graph import NNCFNode + +FX_PT2E_DIR = TEST_ROOT / "torch2" / "data" / "fx" / "compress_pt2e" +FX_AO_DIR = TEST_ROOT / "torch2" / "data" / "fx" / "ao_compression_OpenVINOQuantizer" + + +@dataclass +class ModelCase: + model_builder: Callable[[], torch.nn.Module] + model_id: str + input_shape: tuple[int, ...] + + +def get_dot_filename(model_name: str) -> str: + return model_name + ".dot" + + +def get_wc_param_filename(model_name: str) -> str: + return model_name + "_ref_wc_param.json" + + +def _build_torch_fx_model(model_case: ModelCase) -> tuple[torch.fx.GraphModule, torch.Tensor]: + model = model_case.model_builder() + # ShortTransformer takes token ids; match prior synthetic tests (int32) + example_input = torch.ones(model_case.input_shape, dtype=torch.int32) + fx_model = get_torch_fx_model(model, example_input) + return fx_model, example_input + + +def _get_calibration_dataset(example_input: torch.Tensor) -> nncf.Dataset: + torch.manual_seed(42) + def transform_fn(x): + return x.to("cpu") + sample_1 = torch.randint_like(example_input, 0,10) + sample_2 = torch.randint_like(example_input, 0, 10) + return nncf.Dataset([example_input, sample_1, sample_2], transform_fn) + + +def get_openvino_quantizer(*args, **kwargs) -> OpenVINOQuantizer: + return OpenVINOQuantizer(*args, **kwargs) + + +def _string_from_quantizer_params(qparams: dict[str, Any], pt2e_param: Optional[dict[str, Any]] = None) -> str: + mode = qparams.get("mode") + gs = qparams.get("group_size", "-1") + ratio = qparams.get("ratio", "1") + all_layers = qparams.get("all_layers", "False") + if(pt2e_param is None): + return f"{mode.value}_gs{gs}_ratio{ratio}_all_layers_{all_layers}" + sensitivity_metric = pt2e_param.get("sensitivity_metric", "None") + return f"{mode.value}_gs{gs}_ratio{ratio}_all_layers_{all_layers}_sensitivity_metric_{sensitivity_metric}" + + +BASE_MODELS = ( + ModelCase(LlamaDecoderOnly, "LlamaDecoderOnly", [1,3,64]), + ModelCase(partial(ShortTransformer, 64, 128, True), "short_transformer_shared", [5]), +) + +QUANTIZER_PARAMS = ( + {"mode": QuantizationMode.INT8WO_ASYM}, + {"mode": QuantizationMode.INT4WO_SYM, "group_size": 32, "ratio": 0.8}, + {"mode": QuantizationMode.INT4WO_SYM, "group_size": 32, "ratio": 0.8, "all_layers": True}, +) + +PT2E_PARAMS = ( + {"sensitivity_metric": nncf.SensitivityMetric.HESSIAN_INPUT_ACTIVATION}, + {"sensitivity_metric": nncf.SensitivityMetric.MAX_ACTIVATION_VARIANCE}, + {"sensitivity_metric": nncf.SensitivityMetric.WEIGHT_QUANTIZATION_ERROR}, + {"sensitivity_metric": nncf.SensitivityMetric.MEAN_ACTIVATION_VARIANCE}, + {"sensitivity_metric": nncf.SensitivityMetric.MEAN_ACTIVATION_MAGNITUDE}, +) + + +TEST_MODELS = tuple( + (model, qparam, pt2e_param) + for model in BASE_MODELS + for qparam in QUANTIZER_PARAMS + for pt2e_param in ( + [{}] + if ( + (qparam.get("mode") in {QuantizationMode.INT8WO_ASYM, QuantizationMode.INT8WO_SYM}) + or (qparam.get("ratio") is None) + ) + else PT2E_PARAMS + ) +) + + +TEST_MODEL_IDS = [ + f"{m.model_id}__{_string_from_quantizer_params(qparams, pt2e_param)}" for (m, qparams, pt2e_param) in TEST_MODELS +] + + +@pytest.mark.parametrize( + ("model_case", "quantizer_params", "pt2e_params"), + TEST_MODELS, + ids=TEST_MODEL_IDS, +) + +@pytest.mark.parametrize( + "quantizer_builder", + [get_openvino_quantizer], + ids=["OpenVINOQuantizer"], +) + +def test_compress_pt2e( + quantizer_builder: Callable[..., OpenVINOQuantizer], + model_case: ModelCase, + quantizer_params, + pt2e_params, +): + fx_model, example_input = _build_torch_fx_model(model_case) + with torch.no_grad(): + ref_out = fx_model(example_input) + + calibration_dataset = _get_calibration_dataset(example_input) + + # Build quantizer directly from quantizer_params (already includes mode/group_size) + quantizer = quantizer_builder(**quantizer_params) + + quantized_model = compress_pt2e( + fx_model, + quantizer=quantizer, + dataset=calibration_dataset, + **pt2e_params, + ) + + with torch.no_grad(): + out = quantized_model(example_input) + assert out.shape == ref_out.shape, "Compressed model output shape mismatch." + + nncf_graph: NNCFGraph = GraphConverter.create_nncf_graph(quantized_model) + nx_graph = nncf_graph.get_graph_for_structure_analysis(extended=True) + param_string = _string_from_quantizer_params(quantizer_params, pt2e_params) + path_to_dot = (FX_PT2E_DIR / quantizer.__class__.__name__ / model_case.model_id / get_dot_filename(param_string)).as_posix() + compare_nx_graph_with_reference(nx_graph, path_to_dot) + + +@pytest.mark.parametrize( + ("model_case", "quantizer_params", "pt2e_params"), + TEST_MODELS, + ids=TEST_MODEL_IDS, +) +@pytest.mark.parametrize( + "quantizer_builder", + [get_openvino_quantizer], + ids=["OpenVINOQuantizer"], +) +def test_openvino_quantizer( + model_case: ModelCase, + quantizer_params, + quantizer_builder: Callable[..., OpenVINOQuantizer], + pt2e_params, +): + fx_model, example_input = _build_torch_fx_model(model_case) + quantizer = quantizer_builder(**quantizer_params) + + prepared = prepare_pt2e(fx_model, quantizer) + prepared(example_input) + ao_quantized_model = convert_pt2e(prepared) + + nncf_graph = GraphConverter.create_nncf_graph(ao_quantized_model) + nx_graph = nncf_graph.get_graph_for_structure_analysis(extended=True) + + param_string = _string_from_quantizer_params(quantizer_params) + path_to_dot = (FX_AO_DIR / model_case.model_id / get_dot_filename(param_string)).as_posix() + compare_nx_graph_with_reference(nx_graph, path_to_dot) + + +def _serialize_wc_param(wp) -> dict[str, Any]: + def to_json_serializable(obj): + if dataclasses.is_dataclass(obj): + return {k: to_json_serializable(v) for k, v in dataclasses.asdict(obj).items()} + elif isinstance(obj, Enum): + return obj.value + elif isinstance(obj, (list, tuple)): + return [to_json_serializable(x) for x in obj] + elif isinstance(obj, dict): + return {k: to_json_serializable(v) for k, v in obj.items()} + elif isinstance(obj, NNCFNode): + return obj.node_name + else: + return obj + + return to_json_serializable(wp) + +@pytest.mark.parametrize( + ("model_case", "quantizer_params", "pt2e_params"), + TEST_MODELS, + ids=TEST_MODEL_IDS, +) +@pytest.mark.parametrize( + "quantizer_builder", + [get_openvino_quantizer], + ids=["OpenVINOQuantizer"], +) +def test_openvino_wc_params( + quantizer_builder: Callable[..., OpenVINOQuantizer], + model_case: ModelCase, + quantizer_params, + pt2e_params, + regen_ref_data=False, +): + fx_model, _ = _build_torch_fx_model(model_case) + nncf_graph: NNCFGraph = GraphConverter.create_nncf_graph(fx_model) + + param_string = _string_from_quantizer_params(quantizer_params) + quantizer = quantizer_builder(**quantizer_params) + + all_weight_params, *_ = quantizer.get_nncf_weight_compression_parameters(fx_model, nncf_graph) + + wc_params = _serialize_wc_param(all_weight_params) + + ref_json_path = (FX_PT2E_DIR / quantizer.__class__.__name__ / model_case.model_id / get_wc_param_filename(param_string)) + + if regen_ref_data: + with safe_open(ref_json_path, "w") as file: + json.dump(wc_params, file, indent=4) + + with safe_open(ref_json_path, "r") as f: + ref_data = json.load(f) + + assert wc_params == ref_data, ( + f"Weight compression parameters JSON mismatch for {model_case.model_id} ({param_string}).\n" + f"Ref: {ref_json_path}" + ) diff --git a/tests/torch/test_models/__init__.py b/tests/torch/test_models/__init__.py index 95cba87cc98..f412372978b 100644 --- a/tests/torch/test_models/__init__.py +++ b/tests/torch/test_models/__init__.py @@ -26,3 +26,4 @@ from .sr_small_model import * from .unet import * from .vgg import * +from .llama import * diff --git a/tests/torch/test_models/llama.py b/tests/torch/test_models/llama.py new file mode 100644 index 00000000000..fbf4df50e21 --- /dev/null +++ b/tests/torch/test_models/llama.py @@ -0,0 +1,151 @@ +import math +from typing import Optional, Tuple +import torch +import torch.nn as nn +import torch.nn.functional as F + +EMBED_DIM = 64 +N_HEADS = 4 +HEAD_DIM = EMBED_DIM // N_HEADS +# Same as Llama 3.2 config +ROPE_THETA = 500000.0 +MAX_SEQ = 128 +BIAS = False + + +class LlamaRMSNorm(nn.Module): + def __init__(self, hidden_size, eps=1e-6): + """ + Copied from src/transformers/models/llama/modeling_llama.py + LlamaRMSNorm is equivalent to T5LayerNorm + """ + super().__init__() + self.weight = nn.Parameter(torch.ones(hidden_size)) + self.variance_epsilon = eps + + def forward(self, hidden_states): + input_dtype = hidden_states.dtype + hidden_states = hidden_states.to(torch.float32) + variance = hidden_states.pow(2).mean(-1, keepdim=True) + hidden_states = hidden_states * torch.rsqrt(variance + self.variance_epsilon) + return self.weight * hidden_states.to(input_dtype) + +def _rotate_half(x): + """ + Copied from src/transformers/models/llama/modeling_llama.py + Rotates half the hidden dims of the input. + """ + x1 = x[..., : x.shape[-1] // 2] + x2 = x[..., x.shape[-1] // 2 :] + return torch.cat((-x2, x1), dim=-1) + +class Rotary(nn.Module): + """ + Precompute cos/sin for RoPE and apply to q,k. + Copied from src/transformers/models/llama/modeling_llama.py + Initialize the cos and sin value once in init method + """ + # Llama applies rotary to q,k before attention; see modeling_llama + def __init__(self, head_dim: int, max_seq_len: int = MAX_SEQ, theta: float = ROPE_THETA, device=None): + super().__init__() + dtype = torch.float32 + inv_freq = 1.0 / (theta ** (torch.arange(0, head_dim, 2, dtype=dtype, device=device) / head_dim)) + t = torch.arange(max_seq_len, dtype=dtype, device=device) + freqs = torch.einsum("t,f->tf", t, inv_freq) # (T, Hd/2) + emb = torch.cat((freqs, freqs), dim=-1) # (T, Hd) + self.register_buffer("cos", emb.cos()[None, None, ...], persistent=False) # (1,1,T,Hd) + self.register_buffer("sin", emb.sin()[None, None, ...], persistent=False) + def forward(self, q: torch.Tensor, k: torch.Tensor, pos: torch.Tensor): + cos = self.cos[..., pos, :] + sin = self.sin[..., pos, :] + q_embed = (q * cos) + (_rotate_half(q) * sin) + k_embed = (k * cos) + (_rotate_half(k) * sin) + return q_embed, k_embed + +class LlamaMLP(nn.Module): + """ + Copied from src/transformers/models/llama/modeling_llama.py + """ + def __init__(self, dim: int, mult: int = 2): + super().__init__() + # mult is used as a scaling factor of sorts. This is to define the hidden/intermediate layer size + hidden = mult * dim + self.gate_proj = nn.Linear(dim, hidden, bias=BIAS) + self.up_proj = nn.Linear(dim, hidden, bias=BIAS) + self.down_proj = nn.Linear(hidden, dim, bias=BIAS) + def forward(self, x: torch.Tensor) -> torch.Tensor: + down_proj = self.down_proj(F.silu(self.gate_proj(x)) * self.up_proj(x)) + return down_proj + +class LlamaDecoderOnly(nn.Module): + """ + One Llama-style transformer block (pre-norm attn + MLP) with RoPE and KV cache. + Forward takes embeddings only. + """ + # KV caching + past_key_values flow mirrors HF implementations. :contentReference[oaicite:4]{index=4} + def __init__(self, dim: int = EMBED_DIM, n_heads: int = N_HEADS): + super().__init__() + assert dim % n_heads == 0 + self.n_heads = n_heads + self.head_dim = dim // n_heads + self.scale = 1.0 / math.sqrt(self.head_dim) + + self.attn_norm = LlamaRMSNorm(dim) + self.q_proj = nn.Linear(dim, dim, bias=BIAS) + self.k_proj = nn.Linear(dim, dim, bias=BIAS) + self.v_proj = nn.Linear(dim, dim, bias=BIAS) + self.o_proj = nn.Linear(dim, dim, bias=BIAS) + self.rope = Rotary(self.head_dim, MAX_SEQ, theta=ROPE_THETA) + + self.mlp_norm = LlamaRMSNorm(dim) + self.mlp = LlamaMLP(dim) + + def _attn(self, x: torch.Tensor, pos: torch.Tensor, past_kv: Optional[Tuple[torch.Tensor, torch.Tensor]]): + ''' + Code from LlamaAttention forward method. SDPA implementation similar to model.config._attn_implementation="SDPA" + ''' + B, T, C = x.shape + H, Hd = self.n_heads, self.head_dim + + # QKV projections from hidden state x + q = self.q_proj(x).view(B, T, H, Hd).transpose(1, 2) + k = self.k_proj(x).view(B, T, H, Hd).transpose(1, 2) + v = self.v_proj(x).view(B, T, H, Hd).transpose(1, 2) + + # RoPE + q, k = self.rope(q, k, pos) + + # KV cache + if past_kv is not None: + pk, pv = past_kv # (B,H,Tpast,Hd) + k = torch.cat([pk, k], dim=2) + v = torch.cat([pv, v], dim=2) + + y = torch.nn.functional.scaled_dot_product_attention( + q, k, v, + attn_mask=None, + is_causal=True, + dropout_p=0.0 + ) + + y = y.transpose(1, 2).contiguous().view(B, T, C) + y = self.o_proj(y) + return y, (k, v) + + def forward( + self, + x_embed: torch.Tensor, # (B, T_new, C) embeddings only + past_kv: Optional[Tuple[torch.Tensor, torch.Tensor]] = None, # (B,H,Tpast,Hd) + ): + # positions for the *new* tokens only + past_len = 0 if past_kv is None else past_kv[0].size(2) + T_new = x_embed.size(1) + pos = torch.arange(past_len, past_len + T_new, device=x_embed.device) + + # pre-norm attention + residual + y, kv = self._attn(self.attn_norm(x_embed), pos, past_kv) + x = x_embed + y + + # pre-norm MLP + residual + x = x + self.mlp(self.mlp_norm(x)) + return x diff --git a/tests/torch2/data/fx/ao_compression_OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_False.dot b/tests/torch2/data/fx/ao_compression_OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_False.dot new file mode 100644 index 00000000000..0a9a27fd85b --- /dev/null +++ b/tests/torch2/data/fx/ao_compression_OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_False.dot @@ -0,0 +1,169 @@ +strict digraph { +"0 attn_norm_weight" [id=0, type="get_attr"]; +"1 mlp_norm_weight" [id=1, type="get_attr"]; +"2 q_proj_weight_updated_constant0" [id=2, type="get_attr"]; +"3 symmetric_weights_decompressor_q_proj_weight_0" [id=3, type="call_module"]; +"4 k_proj_weight_updated_constant0" [id=4, type="get_attr"]; +"5 symmetric_weights_decompressor_k_proj_weight_0" [id=5, type="call_module"]; +"6 v_proj_weight_updated_constant0" [id=6, type="get_attr"]; +"7 symmetric_weights_decompressor_v_proj_weight_0" [id=7, type="call_module"]; +"8 o_proj_weight_updated_constant0" [id=8, type="get_attr"]; +"9 symmetric_weights_decompressor_o_proj_weight_0" [id=9, type="call_module"]; +"10 mlp_gate_proj_weight_updated_constant0" [id=10, type="get_attr"]; +"11 symmetric_weights_decompressor_mlp_gate_proj_weight_0" [id=11, type="call_module"]; +"12 mlp_up_proj_weight_updated_constant0" [id=12, type="get_attr"]; +"13 symmetric_weights_decompressor_mlp_up_proj_weight_0" [id=13, type="call_module"]; +"14 mlp_down_proj_weight_updated_constant0" [id=14, type="get_attr"]; +"15 asymmetric_weights_decompressor_mlp_down_proj_weight_0" [id=15, type="call_module"]; +"16 rope_cos" [id=16, type="get_attr"]; +"17 rope_sin" [id=17, type="get_attr"]; +"18 x_embed" [id=18, type=input]; +"19 arange" [id=19, type=arange]; +"20 _assert_tensor_metadata_default" [id=20, type="_assert_tensor_metadata"]; +"21 to" [id=21, type=to]; +"22 pow_1" [id=22, type=pow]; +"23 mean" [id=23, type=mean]; +"24 add" [id=24, type=add]; +"25 rsqrt" [id=25, type=rsqrt]; +"26 mul" [id=26, type=mul]; +"27 _assert_tensor_metadata_default_1" [id=27, type="_assert_tensor_metadata"]; +"28 to_1" [id=28, type=to]; +"29 mul_1" [id=29, type=mul]; +"30 linear" [id=30, type=linear]; +"31 view" [id=31, type=view]; +"32 transpose" [id=32, type=transpose]; +"33 linear_1" [id=33, type=linear]; +"34 view_1" [id=34, type=view]; +"35 transpose_1" [id=35, type=transpose]; +"36 linear_2" [id=36, type=linear]; +"37 view_2" [id=37, type=view]; +"38 transpose_2" [id=38, type=transpose]; +"39 index" [id=39, type=index]; +"40 index_1" [id=40, type=index]; +"41 mul_2" [id=41, type=mul]; +"42 slice_1" [id=42, type=slice]; +"43 slice_2" [id=43, type=slice]; +"44 neg" [id=44, type=neg]; +"45 cat" [id=45, type=cat]; +"46 mul_3" [id=46, type=mul]; +"47 add_1" [id=47, type=add]; +"48 mul_4" [id=48, type=mul]; +"49 slice_3" [id=49, type=slice]; +"50 slice_4" [id=50, type=slice]; +"51 neg_1" [id=51, type=neg]; +"52 cat_1" [id=52, type=cat]; +"53 mul_5" [id=53, type=mul]; +"54 add_2" [id=54, type=add]; +"55 scaled_dot_product_attention" [id=55, type="scaled_dot_product_attention"]; +"56 transpose_3" [id=56, type=transpose]; +"57 view_3" [id=57, type=view]; +"58 linear_3" [id=58, type=linear]; +"59 add_3" [id=59, type=add]; +"60 _assert_tensor_metadata_default_2" [id=60, type="_assert_tensor_metadata"]; +"61 to_2" [id=61, type=to]; +"62 pow_2" [id=62, type=pow]; +"63 mean_1" [id=63, type=mean]; +"64 add_4" [id=64, type=add]; +"65 rsqrt_1" [id=65, type=rsqrt]; +"66 mul_6" [id=66, type=mul]; +"67 _assert_tensor_metadata_default_3" [id=67, type="_assert_tensor_metadata"]; +"68 to_3" [id=68, type=to]; +"69 mul_7" [id=69, type=mul]; +"70 linear_4" [id=70, type=linear]; +"71 silu" [id=71, type=silu]; +"72 linear_5" [id=72, type=linear]; +"73 mul_8" [id=73, type=mul]; +"74 linear_6" [id=74, type=linear]; +"75 add_5" [id=75, type=add]; +"76 output" [id=76, type=output]; +"0 attn_norm_weight" -> "29 mul_1" [style=solid, label="(64,)"]; +"1 mlp_norm_weight" -> "69 mul_7" [style=solid, label="(64,)"]; +"2 q_proj_weight_updated_constant0" -> "3 symmetric_weights_decompressor_q_proj_weight_0" [style=solid, label="(2048, 1)"]; +"3 symmetric_weights_decompressor_q_proj_weight_0" -> "30 linear" [style=solid, label="(64, 64)"]; +"4 k_proj_weight_updated_constant0" -> "5 symmetric_weights_decompressor_k_proj_weight_0" [style=solid, label="(2048, 1)"]; +"5 symmetric_weights_decompressor_k_proj_weight_0" -> "33 linear_1" [style=solid, label="(64, 64)"]; +"6 v_proj_weight_updated_constant0" -> "7 symmetric_weights_decompressor_v_proj_weight_0" [style=solid, label="(2048, 1)"]; +"7 symmetric_weights_decompressor_v_proj_weight_0" -> "36 linear_2" [style=solid, label="(64, 64)"]; +"8 o_proj_weight_updated_constant0" -> "9 symmetric_weights_decompressor_o_proj_weight_0" [style=solid, label="(2048, 1)"]; +"9 symmetric_weights_decompressor_o_proj_weight_0" -> "58 linear_3" [style=solid, label="(64, 64)"]; +"10 mlp_gate_proj_weight_updated_constant0" -> "11 symmetric_weights_decompressor_mlp_gate_proj_weight_0" [style=solid, label="(4096, 1)"]; +"11 symmetric_weights_decompressor_mlp_gate_proj_weight_0" -> "70 linear_4" [style=solid, label="(128, 64)"]; +"12 mlp_up_proj_weight_updated_constant0" -> "13 symmetric_weights_decompressor_mlp_up_proj_weight_0" [style=solid, label="(4096, 1)"]; +"13 symmetric_weights_decompressor_mlp_up_proj_weight_0" -> "72 linear_5" [style=solid, label="(128, 64)"]; +"14 mlp_down_proj_weight_updated_constant0" -> "15 asymmetric_weights_decompressor_mlp_down_proj_weight_0" [style=solid, label="(64, 128)"]; +"15 asymmetric_weights_decompressor_mlp_down_proj_weight_0" -> "74 linear_6" [style=solid, label="(64, 128)"]; +"16 rope_cos" -> "39 index" [style=solid, label="(1, 1, 128, 16)"]; +"17 rope_sin" -> "40 index_1" [style=solid, label="(1, 1, 128, 16)"]; +"18 x_embed" -> "20 _assert_tensor_metadata_default" [style=solid, label="(1, 3, 64)"]; +"18 x_embed" -> "21 to" [style=solid, label="(1, 3, 64)"]; +"18 x_embed" -> "59 add_3" [style=solid, label="(1, 3, 64)"]; +"19 arange" -> "39 index" [style=solid, label="(3,)"]; +"19 arange" -> "40 index_1" [style=solid, label="(3,)"]; +"21 to" -> "22 pow_1" [style=solid, label="(1, 3, 64)"]; +"21 to" -> "26 mul" [style=solid, label="(1, 3, 64)"]; +"22 pow_1" -> "23 mean" [style=solid, label="(1, 3, 64)"]; +"23 mean" -> "24 add" [style=solid, label="(1, 3, 1)"]; +"24 add" -> "25 rsqrt" [style=solid, label="(1, 3, 1)"]; +"25 rsqrt" -> "26 mul" [style=solid, label="(1, 3, 1)"]; +"26 mul" -> "27 _assert_tensor_metadata_default_1" [style=solid, label="(1, 3, 64)"]; +"26 mul" -> "28 to_1" [style=solid, label="(1, 3, 64)"]; +"28 to_1" -> "29 mul_1" [style=solid, label="(1, 3, 64)"]; +"29 mul_1" -> "30 linear" [style=solid, label="(1, 3, 64)"]; +"29 mul_1" -> "33 linear_1" [style=solid, label="(1, 3, 64)"]; +"29 mul_1" -> "36 linear_2" [style=solid, label="(1, 3, 64)"]; +"30 linear" -> "31 view" [style=solid, label="(1, 3, 64)"]; +"31 view" -> "32 transpose" [style=solid, label="(1, 3, 4, 16)"]; +"32 transpose" -> "41 mul_2" [style=solid, label="(1, 4, 3, 16)"]; +"32 transpose" -> "42 slice_1" [style=solid, label="(1, 4, 3, 16)"]; +"32 transpose" -> "43 slice_2" [style=solid, label="(1, 4, 3, 16)"]; +"33 linear_1" -> "34 view_1" [style=solid, label="(1, 3, 64)"]; +"34 view_1" -> "35 transpose_1" [style=solid, label="(1, 3, 4, 16)"]; +"35 transpose_1" -> "48 mul_4" [style=solid, label="(1, 4, 3, 16)"]; +"35 transpose_1" -> "49 slice_3" [style=solid, label="(1, 4, 3, 16)"]; +"35 transpose_1" -> "50 slice_4" [style=solid, label="(1, 4, 3, 16)"]; +"36 linear_2" -> "37 view_2" [style=solid, label="(1, 3, 64)"]; +"37 view_2" -> "38 transpose_2" [style=solid, label="(1, 3, 4, 16)"]; +"38 transpose_2" -> "55 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; +"39 index" -> "41 mul_2" [style=solid, label="(1, 1, 3, 16)"]; +"39 index" -> "48 mul_4" [style=solid, label="(1, 1, 3, 16)"]; +"40 index_1" -> "46 mul_3" [style=solid, label="(1, 1, 3, 16)"]; +"40 index_1" -> "53 mul_5" [style=solid, label="(1, 1, 3, 16)"]; +"41 mul_2" -> "47 add_1" [style=solid, label="(1, 4, 3, 16)"]; +"42 slice_1" -> "45 cat" [style=solid, label="(1, 4, 3, 8)"]; +"43 slice_2" -> "44 neg" [style=solid, label="(1, 4, 3, 8)"]; +"44 neg" -> "45 cat" [style=solid, label="(1, 4, 3, 8)"]; +"45 cat" -> "46 mul_3" [style=solid, label="(1, 4, 3, 16)"]; +"46 mul_3" -> "47 add_1" [style=solid, label="(1, 4, 3, 16)"]; +"47 add_1" -> "55 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; +"48 mul_4" -> "54 add_2" [style=solid, label="(1, 4, 3, 16)"]; +"49 slice_3" -> "52 cat_1" [style=solid, label="(1, 4, 3, 8)"]; +"50 slice_4" -> "51 neg_1" [style=solid, label="(1, 4, 3, 8)"]; +"51 neg_1" -> "52 cat_1" [style=solid, label="(1, 4, 3, 8)"]; +"52 cat_1" -> "53 mul_5" [style=solid, label="(1, 4, 3, 16)"]; +"53 mul_5" -> "54 add_2" [style=solid, label="(1, 4, 3, 16)"]; +"54 add_2" -> "55 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; +"55 scaled_dot_product_attention" -> "56 transpose_3" [style=solid, label="(1, 4, 3, 16)"]; +"56 transpose_3" -> "57 view_3" [style=solid, label="(1, 3, 4, 16)"]; +"57 view_3" -> "58 linear_3" [style=solid, label="(1, 3, 64)"]; +"58 linear_3" -> "59 add_3" [style=solid, label="(1, 3, 64)"]; +"59 add_3" -> "60 _assert_tensor_metadata_default_2" [style=solid, label="(1, 3, 64)"]; +"59 add_3" -> "61 to_2" [style=solid, label="(1, 3, 64)"]; +"61 to_2" -> "62 pow_2" [style=solid, label="(1, 3, 64)"]; +"61 to_2" -> "66 mul_6" [style=solid, label="(1, 3, 64)"]; +"61 to_2" -> "75 add_5" [style=solid, label="(1, 3, 64)"]; +"62 pow_2" -> "63 mean_1" [style=solid, label="(1, 3, 64)"]; +"63 mean_1" -> "64 add_4" [style=solid, label="(1, 3, 1)"]; +"64 add_4" -> "65 rsqrt_1" [style=solid, label="(1, 3, 1)"]; +"65 rsqrt_1" -> "66 mul_6" [style=solid, label="(1, 3, 1)"]; +"66 mul_6" -> "67 _assert_tensor_metadata_default_3" [style=solid, label="(1, 3, 64)"]; +"66 mul_6" -> "68 to_3" [style=solid, label="(1, 3, 64)"]; +"68 to_3" -> "69 mul_7" [style=solid, label="(1, 3, 64)"]; +"69 mul_7" -> "70 linear_4" [style=solid, label="(1, 3, 64)"]; +"69 mul_7" -> "72 linear_5" [style=solid, label="(1, 3, 64)"]; +"70 linear_4" -> "71 silu" [style=solid, label="(1, 3, 128)"]; +"71 silu" -> "73 mul_8" [style=solid, label="(1, 3, 128)"]; +"72 linear_5" -> "73 mul_8" [style=solid, label="(1, 3, 128)"]; +"73 mul_8" -> "74 linear_6" [style=solid, label="(1, 3, 128)"]; +"74 linear_6" -> "75 add_5" [style=solid, label="(1, 3, 64)"]; +"75 add_5" -> "76 output" [style=solid, label="(1, 3, 64)"]; +} diff --git a/tests/torch2/data/fx/ao_compression_OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_True.dot b/tests/torch2/data/fx/ao_compression_OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_True.dot new file mode 100644 index 00000000000..254abcb9dc0 --- /dev/null +++ b/tests/torch2/data/fx/ao_compression_OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_True.dot @@ -0,0 +1,169 @@ +strict digraph { +"0 attn_norm_weight" [id=0, type="get_attr"]; +"1 mlp_norm_weight" [id=1, type="get_attr"]; +"2 q_proj_weight_updated_constant0" [id=2, type="get_attr"]; +"3 symmetric_weights_decompressor_q_proj_weight_0" [id=3, type="call_module"]; +"4 k_proj_weight_updated_constant0" [id=4, type="get_attr"]; +"5 symmetric_weights_decompressor_k_proj_weight_0" [id=5, type="call_module"]; +"6 v_proj_weight_updated_constant0" [id=6, type="get_attr"]; +"7 symmetric_weights_decompressor_v_proj_weight_0" [id=7, type="call_module"]; +"8 o_proj_weight_updated_constant0" [id=8, type="get_attr"]; +"9 symmetric_weights_decompressor_o_proj_weight_0" [id=9, type="call_module"]; +"10 mlp_gate_proj_weight_updated_constant0" [id=10, type="get_attr"]; +"11 symmetric_weights_decompressor_mlp_gate_proj_weight_0" [id=11, type="call_module"]; +"12 mlp_up_proj_weight_updated_constant0" [id=12, type="get_attr"]; +"13 symmetric_weights_decompressor_mlp_up_proj_weight_0" [id=13, type="call_module"]; +"14 mlp_down_proj_weight_updated_constant0" [id=14, type="get_attr"]; +"15 symmetric_weights_decompressor_mlp_down_proj_weight_0" [id=15, type="call_module"]; +"16 rope_cos" [id=16, type="get_attr"]; +"17 rope_sin" [id=17, type="get_attr"]; +"18 x_embed" [id=18, type=input]; +"19 arange" [id=19, type=arange]; +"20 _assert_tensor_metadata_default" [id=20, type="_assert_tensor_metadata"]; +"21 to" [id=21, type=to]; +"22 pow_1" [id=22, type=pow]; +"23 mean" [id=23, type=mean]; +"24 add" [id=24, type=add]; +"25 rsqrt" [id=25, type=rsqrt]; +"26 mul" [id=26, type=mul]; +"27 _assert_tensor_metadata_default_1" [id=27, type="_assert_tensor_metadata"]; +"28 to_1" [id=28, type=to]; +"29 mul_1" [id=29, type=mul]; +"30 linear" [id=30, type=linear]; +"31 view" [id=31, type=view]; +"32 transpose" [id=32, type=transpose]; +"33 linear_1" [id=33, type=linear]; +"34 view_1" [id=34, type=view]; +"35 transpose_1" [id=35, type=transpose]; +"36 linear_2" [id=36, type=linear]; +"37 view_2" [id=37, type=view]; +"38 transpose_2" [id=38, type=transpose]; +"39 index" [id=39, type=index]; +"40 index_1" [id=40, type=index]; +"41 mul_2" [id=41, type=mul]; +"42 slice_1" [id=42, type=slice]; +"43 slice_2" [id=43, type=slice]; +"44 neg" [id=44, type=neg]; +"45 cat" [id=45, type=cat]; +"46 mul_3" [id=46, type=mul]; +"47 add_1" [id=47, type=add]; +"48 mul_4" [id=48, type=mul]; +"49 slice_3" [id=49, type=slice]; +"50 slice_4" [id=50, type=slice]; +"51 neg_1" [id=51, type=neg]; +"52 cat_1" [id=52, type=cat]; +"53 mul_5" [id=53, type=mul]; +"54 add_2" [id=54, type=add]; +"55 scaled_dot_product_attention" [id=55, type="scaled_dot_product_attention"]; +"56 transpose_3" [id=56, type=transpose]; +"57 view_3" [id=57, type=view]; +"58 linear_3" [id=58, type=linear]; +"59 add_3" [id=59, type=add]; +"60 _assert_tensor_metadata_default_2" [id=60, type="_assert_tensor_metadata"]; +"61 to_2" [id=61, type=to]; +"62 pow_2" [id=62, type=pow]; +"63 mean_1" [id=63, type=mean]; +"64 add_4" [id=64, type=add]; +"65 rsqrt_1" [id=65, type=rsqrt]; +"66 mul_6" [id=66, type=mul]; +"67 _assert_tensor_metadata_default_3" [id=67, type="_assert_tensor_metadata"]; +"68 to_3" [id=68, type=to]; +"69 mul_7" [id=69, type=mul]; +"70 linear_4" [id=70, type=linear]; +"71 silu" [id=71, type=silu]; +"72 linear_5" [id=72, type=linear]; +"73 mul_8" [id=73, type=mul]; +"74 linear_6" [id=74, type=linear]; +"75 add_5" [id=75, type=add]; +"76 output" [id=76, type=output]; +"0 attn_norm_weight" -> "29 mul_1" [style=solid, label="(64,)"]; +"1 mlp_norm_weight" -> "69 mul_7" [style=solid, label="(64,)"]; +"2 q_proj_weight_updated_constant0" -> "3 symmetric_weights_decompressor_q_proj_weight_0" [style=solid, label="(2048, 1)"]; +"3 symmetric_weights_decompressor_q_proj_weight_0" -> "30 linear" [style=solid, label="(64, 64)"]; +"4 k_proj_weight_updated_constant0" -> "5 symmetric_weights_decompressor_k_proj_weight_0" [style=solid, label="(2048, 1)"]; +"5 symmetric_weights_decompressor_k_proj_weight_0" -> "33 linear_1" [style=solid, label="(64, 64)"]; +"6 v_proj_weight_updated_constant0" -> "7 symmetric_weights_decompressor_v_proj_weight_0" [style=solid, label="(2048, 1)"]; +"7 symmetric_weights_decompressor_v_proj_weight_0" -> "36 linear_2" [style=solid, label="(64, 64)"]; +"8 o_proj_weight_updated_constant0" -> "9 symmetric_weights_decompressor_o_proj_weight_0" [style=solid, label="(2048, 1)"]; +"9 symmetric_weights_decompressor_o_proj_weight_0" -> "58 linear_3" [style=solid, label="(64, 64)"]; +"10 mlp_gate_proj_weight_updated_constant0" -> "11 symmetric_weights_decompressor_mlp_gate_proj_weight_0" [style=solid, label="(4096, 1)"]; +"11 symmetric_weights_decompressor_mlp_gate_proj_weight_0" -> "70 linear_4" [style=solid, label="(128, 64)"]; +"12 mlp_up_proj_weight_updated_constant0" -> "13 symmetric_weights_decompressor_mlp_up_proj_weight_0" [style=solid, label="(4096, 1)"]; +"13 symmetric_weights_decompressor_mlp_up_proj_weight_0" -> "72 linear_5" [style=solid, label="(128, 64)"]; +"14 mlp_down_proj_weight_updated_constant0" -> "15 symmetric_weights_decompressor_mlp_down_proj_weight_0" [style=solid, label="(4096, 1)"]; +"15 symmetric_weights_decompressor_mlp_down_proj_weight_0" -> "74 linear_6" [style=solid, label="(64, 128)"]; +"16 rope_cos" -> "39 index" [style=solid, label="(1, 1, 128, 16)"]; +"17 rope_sin" -> "40 index_1" [style=solid, label="(1, 1, 128, 16)"]; +"18 x_embed" -> "20 _assert_tensor_metadata_default" [style=solid, label="(1, 3, 64)"]; +"18 x_embed" -> "21 to" [style=solid, label="(1, 3, 64)"]; +"18 x_embed" -> "59 add_3" [style=solid, label="(1, 3, 64)"]; +"19 arange" -> "39 index" [style=solid, label="(3,)"]; +"19 arange" -> "40 index_1" [style=solid, label="(3,)"]; +"21 to" -> "22 pow_1" [style=solid, label="(1, 3, 64)"]; +"21 to" -> "26 mul" [style=solid, label="(1, 3, 64)"]; +"22 pow_1" -> "23 mean" [style=solid, label="(1, 3, 64)"]; +"23 mean" -> "24 add" [style=solid, label="(1, 3, 1)"]; +"24 add" -> "25 rsqrt" [style=solid, label="(1, 3, 1)"]; +"25 rsqrt" -> "26 mul" [style=solid, label="(1, 3, 1)"]; +"26 mul" -> "27 _assert_tensor_metadata_default_1" [style=solid, label="(1, 3, 64)"]; +"26 mul" -> "28 to_1" [style=solid, label="(1, 3, 64)"]; +"28 to_1" -> "29 mul_1" [style=solid, label="(1, 3, 64)"]; +"29 mul_1" -> "30 linear" [style=solid, label="(1, 3, 64)"]; +"29 mul_1" -> "33 linear_1" [style=solid, label="(1, 3, 64)"]; +"29 mul_1" -> "36 linear_2" [style=solid, label="(1, 3, 64)"]; +"30 linear" -> "31 view" [style=solid, label="(1, 3, 64)"]; +"31 view" -> "32 transpose" [style=solid, label="(1, 3, 4, 16)"]; +"32 transpose" -> "41 mul_2" [style=solid, label="(1, 4, 3, 16)"]; +"32 transpose" -> "42 slice_1" [style=solid, label="(1, 4, 3, 16)"]; +"32 transpose" -> "43 slice_2" [style=solid, label="(1, 4, 3, 16)"]; +"33 linear_1" -> "34 view_1" [style=solid, label="(1, 3, 64)"]; +"34 view_1" -> "35 transpose_1" [style=solid, label="(1, 3, 4, 16)"]; +"35 transpose_1" -> "48 mul_4" [style=solid, label="(1, 4, 3, 16)"]; +"35 transpose_1" -> "49 slice_3" [style=solid, label="(1, 4, 3, 16)"]; +"35 transpose_1" -> "50 slice_4" [style=solid, label="(1, 4, 3, 16)"]; +"36 linear_2" -> "37 view_2" [style=solid, label="(1, 3, 64)"]; +"37 view_2" -> "38 transpose_2" [style=solid, label="(1, 3, 4, 16)"]; +"38 transpose_2" -> "55 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; +"39 index" -> "41 mul_2" [style=solid, label="(1, 1, 3, 16)"]; +"39 index" -> "48 mul_4" [style=solid, label="(1, 1, 3, 16)"]; +"40 index_1" -> "46 mul_3" [style=solid, label="(1, 1, 3, 16)"]; +"40 index_1" -> "53 mul_5" [style=solid, label="(1, 1, 3, 16)"]; +"41 mul_2" -> "47 add_1" [style=solid, label="(1, 4, 3, 16)"]; +"42 slice_1" -> "45 cat" [style=solid, label="(1, 4, 3, 8)"]; +"43 slice_2" -> "44 neg" [style=solid, label="(1, 4, 3, 8)"]; +"44 neg" -> "45 cat" [style=solid, label="(1, 4, 3, 8)"]; +"45 cat" -> "46 mul_3" [style=solid, label="(1, 4, 3, 16)"]; +"46 mul_3" -> "47 add_1" [style=solid, label="(1, 4, 3, 16)"]; +"47 add_1" -> "55 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; +"48 mul_4" -> "54 add_2" [style=solid, label="(1, 4, 3, 16)"]; +"49 slice_3" -> "52 cat_1" [style=solid, label="(1, 4, 3, 8)"]; +"50 slice_4" -> "51 neg_1" [style=solid, label="(1, 4, 3, 8)"]; +"51 neg_1" -> "52 cat_1" [style=solid, label="(1, 4, 3, 8)"]; +"52 cat_1" -> "53 mul_5" [style=solid, label="(1, 4, 3, 16)"]; +"53 mul_5" -> "54 add_2" [style=solid, label="(1, 4, 3, 16)"]; +"54 add_2" -> "55 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; +"55 scaled_dot_product_attention" -> "56 transpose_3" [style=solid, label="(1, 4, 3, 16)"]; +"56 transpose_3" -> "57 view_3" [style=solid, label="(1, 3, 4, 16)"]; +"57 view_3" -> "58 linear_3" [style=solid, label="(1, 3, 64)"]; +"58 linear_3" -> "59 add_3" [style=solid, label="(1, 3, 64)"]; +"59 add_3" -> "60 _assert_tensor_metadata_default_2" [style=solid, label="(1, 3, 64)"]; +"59 add_3" -> "61 to_2" [style=solid, label="(1, 3, 64)"]; +"61 to_2" -> "62 pow_2" [style=solid, label="(1, 3, 64)"]; +"61 to_2" -> "66 mul_6" [style=solid, label="(1, 3, 64)"]; +"61 to_2" -> "75 add_5" [style=solid, label="(1, 3, 64)"]; +"62 pow_2" -> "63 mean_1" [style=solid, label="(1, 3, 64)"]; +"63 mean_1" -> "64 add_4" [style=solid, label="(1, 3, 1)"]; +"64 add_4" -> "65 rsqrt_1" [style=solid, label="(1, 3, 1)"]; +"65 rsqrt_1" -> "66 mul_6" [style=solid, label="(1, 3, 1)"]; +"66 mul_6" -> "67 _assert_tensor_metadata_default_3" [style=solid, label="(1, 3, 64)"]; +"66 mul_6" -> "68 to_3" [style=solid, label="(1, 3, 64)"]; +"68 to_3" -> "69 mul_7" [style=solid, label="(1, 3, 64)"]; +"69 mul_7" -> "70 linear_4" [style=solid, label="(1, 3, 64)"]; +"69 mul_7" -> "72 linear_5" [style=solid, label="(1, 3, 64)"]; +"70 linear_4" -> "71 silu" [style=solid, label="(1, 3, 128)"]; +"71 silu" -> "73 mul_8" [style=solid, label="(1, 3, 128)"]; +"72 linear_5" -> "73 mul_8" [style=solid, label="(1, 3, 128)"]; +"73 mul_8" -> "74 linear_6" [style=solid, label="(1, 3, 128)"]; +"74 linear_6" -> "75 add_5" [style=solid, label="(1, 3, 64)"]; +"75 add_5" -> "76 output" [style=solid, label="(1, 3, 64)"]; +} diff --git a/tests/torch2/data/fx/ao_compression_OpenVINOQuantizer/LlamaDecoderOnly/int8wo_asym_gs-1_ratio1_all_layers_False.dot b/tests/torch2/data/fx/ao_compression_OpenVINOQuantizer/LlamaDecoderOnly/int8wo_asym_gs-1_ratio1_all_layers_False.dot new file mode 100644 index 00000000000..614e06a21ac --- /dev/null +++ b/tests/torch2/data/fx/ao_compression_OpenVINOQuantizer/LlamaDecoderOnly/int8wo_asym_gs-1_ratio1_all_layers_False.dot @@ -0,0 +1,169 @@ +strict digraph { +"0 attn_norm_weight" [id=0, type="get_attr"]; +"1 mlp_norm_weight" [id=1, type="get_attr"]; +"2 q_proj_weight_updated_constant0" [id=2, type="get_attr"]; +"3 asymmetric_weights_decompressor_q_proj_weight_0" [id=3, type="call_module"]; +"4 k_proj_weight_updated_constant0" [id=4, type="get_attr"]; +"5 asymmetric_weights_decompressor_k_proj_weight_0" [id=5, type="call_module"]; +"6 v_proj_weight_updated_constant0" [id=6, type="get_attr"]; +"7 asymmetric_weights_decompressor_v_proj_weight_0" [id=7, type="call_module"]; +"8 o_proj_weight_updated_constant0" [id=8, type="get_attr"]; +"9 asymmetric_weights_decompressor_o_proj_weight_0" [id=9, type="call_module"]; +"10 mlp_gate_proj_weight_updated_constant0" [id=10, type="get_attr"]; +"11 asymmetric_weights_decompressor_mlp_gate_proj_weight_0" [id=11, type="call_module"]; +"12 mlp_up_proj_weight_updated_constant0" [id=12, type="get_attr"]; +"13 asymmetric_weights_decompressor_mlp_up_proj_weight_0" [id=13, type="call_module"]; +"14 mlp_down_proj_weight_updated_constant0" [id=14, type="get_attr"]; +"15 asymmetric_weights_decompressor_mlp_down_proj_weight_0" [id=15, type="call_module"]; +"16 rope_cos" [id=16, type="get_attr"]; +"17 rope_sin" [id=17, type="get_attr"]; +"18 x_embed" [id=18, type=input]; +"19 arange" [id=19, type=arange]; +"20 _assert_tensor_metadata_default" [id=20, type="_assert_tensor_metadata"]; +"21 to" [id=21, type=to]; +"22 pow_1" [id=22, type=pow]; +"23 mean" [id=23, type=mean]; +"24 add" [id=24, type=add]; +"25 rsqrt" [id=25, type=rsqrt]; +"26 mul" [id=26, type=mul]; +"27 _assert_tensor_metadata_default_1" [id=27, type="_assert_tensor_metadata"]; +"28 to_1" [id=28, type=to]; +"29 mul_1" [id=29, type=mul]; +"30 linear" [id=30, type=linear]; +"31 view" [id=31, type=view]; +"32 transpose" [id=32, type=transpose]; +"33 linear_1" [id=33, type=linear]; +"34 view_1" [id=34, type=view]; +"35 transpose_1" [id=35, type=transpose]; +"36 linear_2" [id=36, type=linear]; +"37 view_2" [id=37, type=view]; +"38 transpose_2" [id=38, type=transpose]; +"39 index" [id=39, type=index]; +"40 index_1" [id=40, type=index]; +"41 mul_2" [id=41, type=mul]; +"42 slice_1" [id=42, type=slice]; +"43 slice_2" [id=43, type=slice]; +"44 neg" [id=44, type=neg]; +"45 cat" [id=45, type=cat]; +"46 mul_3" [id=46, type=mul]; +"47 add_1" [id=47, type=add]; +"48 mul_4" [id=48, type=mul]; +"49 slice_3" [id=49, type=slice]; +"50 slice_4" [id=50, type=slice]; +"51 neg_1" [id=51, type=neg]; +"52 cat_1" [id=52, type=cat]; +"53 mul_5" [id=53, type=mul]; +"54 add_2" [id=54, type=add]; +"55 scaled_dot_product_attention" [id=55, type="scaled_dot_product_attention"]; +"56 transpose_3" [id=56, type=transpose]; +"57 view_3" [id=57, type=view]; +"58 linear_3" [id=58, type=linear]; +"59 add_3" [id=59, type=add]; +"60 _assert_tensor_metadata_default_2" [id=60, type="_assert_tensor_metadata"]; +"61 to_2" [id=61, type=to]; +"62 pow_2" [id=62, type=pow]; +"63 mean_1" [id=63, type=mean]; +"64 add_4" [id=64, type=add]; +"65 rsqrt_1" [id=65, type=rsqrt]; +"66 mul_6" [id=66, type=mul]; +"67 _assert_tensor_metadata_default_3" [id=67, type="_assert_tensor_metadata"]; +"68 to_3" [id=68, type=to]; +"69 mul_7" [id=69, type=mul]; +"70 linear_4" [id=70, type=linear]; +"71 silu" [id=71, type=silu]; +"72 linear_5" [id=72, type=linear]; +"73 mul_8" [id=73, type=mul]; +"74 linear_6" [id=74, type=linear]; +"75 add_5" [id=75, type=add]; +"76 output" [id=76, type=output]; +"0 attn_norm_weight" -> "29 mul_1" [style=solid, label="(64,)"]; +"1 mlp_norm_weight" -> "69 mul_7" [style=solid, label="(64,)"]; +"2 q_proj_weight_updated_constant0" -> "3 asymmetric_weights_decompressor_q_proj_weight_0" [style=solid, label="(64, 64)"]; +"3 asymmetric_weights_decompressor_q_proj_weight_0" -> "30 linear" [style=solid, label="(64, 64)"]; +"4 k_proj_weight_updated_constant0" -> "5 asymmetric_weights_decompressor_k_proj_weight_0" [style=solid, label="(64, 64)"]; +"5 asymmetric_weights_decompressor_k_proj_weight_0" -> "33 linear_1" [style=solid, label="(64, 64)"]; +"6 v_proj_weight_updated_constant0" -> "7 asymmetric_weights_decompressor_v_proj_weight_0" [style=solid, label="(64, 64)"]; +"7 asymmetric_weights_decompressor_v_proj_weight_0" -> "36 linear_2" [style=solid, label="(64, 64)"]; +"8 o_proj_weight_updated_constant0" -> "9 asymmetric_weights_decompressor_o_proj_weight_0" [style=solid, label="(64, 64)"]; +"9 asymmetric_weights_decompressor_o_proj_weight_0" -> "58 linear_3" [style=solid, label="(64, 64)"]; +"10 mlp_gate_proj_weight_updated_constant0" -> "11 asymmetric_weights_decompressor_mlp_gate_proj_weight_0" [style=solid, label="(128, 64)"]; +"11 asymmetric_weights_decompressor_mlp_gate_proj_weight_0" -> "70 linear_4" [style=solid, label="(128, 64)"]; +"12 mlp_up_proj_weight_updated_constant0" -> "13 asymmetric_weights_decompressor_mlp_up_proj_weight_0" [style=solid, label="(128, 64)"]; +"13 asymmetric_weights_decompressor_mlp_up_proj_weight_0" -> "72 linear_5" [style=solid, label="(128, 64)"]; +"14 mlp_down_proj_weight_updated_constant0" -> "15 asymmetric_weights_decompressor_mlp_down_proj_weight_0" [style=solid, label="(64, 128)"]; +"15 asymmetric_weights_decompressor_mlp_down_proj_weight_0" -> "74 linear_6" [style=solid, label="(64, 128)"]; +"16 rope_cos" -> "39 index" [style=solid, label="(1, 1, 128, 16)"]; +"17 rope_sin" -> "40 index_1" [style=solid, label="(1, 1, 128, 16)"]; +"18 x_embed" -> "20 _assert_tensor_metadata_default" [style=solid, label="(1, 3, 64)"]; +"18 x_embed" -> "21 to" [style=solid, label="(1, 3, 64)"]; +"18 x_embed" -> "59 add_3" [style=solid, label="(1, 3, 64)"]; +"19 arange" -> "39 index" [style=solid, label="(3,)"]; +"19 arange" -> "40 index_1" [style=solid, label="(3,)"]; +"21 to" -> "22 pow_1" [style=solid, label="(1, 3, 64)"]; +"21 to" -> "26 mul" [style=solid, label="(1, 3, 64)"]; +"22 pow_1" -> "23 mean" [style=solid, label="(1, 3, 64)"]; +"23 mean" -> "24 add" [style=solid, label="(1, 3, 1)"]; +"24 add" -> "25 rsqrt" [style=solid, label="(1, 3, 1)"]; +"25 rsqrt" -> "26 mul" [style=solid, label="(1, 3, 1)"]; +"26 mul" -> "27 _assert_tensor_metadata_default_1" [style=solid, label="(1, 3, 64)"]; +"26 mul" -> "28 to_1" [style=solid, label="(1, 3, 64)"]; +"28 to_1" -> "29 mul_1" [style=solid, label="(1, 3, 64)"]; +"29 mul_1" -> "30 linear" [style=solid, label="(1, 3, 64)"]; +"29 mul_1" -> "33 linear_1" [style=solid, label="(1, 3, 64)"]; +"29 mul_1" -> "36 linear_2" [style=solid, label="(1, 3, 64)"]; +"30 linear" -> "31 view" [style=solid, label="(1, 3, 64)"]; +"31 view" -> "32 transpose" [style=solid, label="(1, 3, 4, 16)"]; +"32 transpose" -> "41 mul_2" [style=solid, label="(1, 4, 3, 16)"]; +"32 transpose" -> "42 slice_1" [style=solid, label="(1, 4, 3, 16)"]; +"32 transpose" -> "43 slice_2" [style=solid, label="(1, 4, 3, 16)"]; +"33 linear_1" -> "34 view_1" [style=solid, label="(1, 3, 64)"]; +"34 view_1" -> "35 transpose_1" [style=solid, label="(1, 3, 4, 16)"]; +"35 transpose_1" -> "48 mul_4" [style=solid, label="(1, 4, 3, 16)"]; +"35 transpose_1" -> "49 slice_3" [style=solid, label="(1, 4, 3, 16)"]; +"35 transpose_1" -> "50 slice_4" [style=solid, label="(1, 4, 3, 16)"]; +"36 linear_2" -> "37 view_2" [style=solid, label="(1, 3, 64)"]; +"37 view_2" -> "38 transpose_2" [style=solid, label="(1, 3, 4, 16)"]; +"38 transpose_2" -> "55 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; +"39 index" -> "41 mul_2" [style=solid, label="(1, 1, 3, 16)"]; +"39 index" -> "48 mul_4" [style=solid, label="(1, 1, 3, 16)"]; +"40 index_1" -> "46 mul_3" [style=solid, label="(1, 1, 3, 16)"]; +"40 index_1" -> "53 mul_5" [style=solid, label="(1, 1, 3, 16)"]; +"41 mul_2" -> "47 add_1" [style=solid, label="(1, 4, 3, 16)"]; +"42 slice_1" -> "45 cat" [style=solid, label="(1, 4, 3, 8)"]; +"43 slice_2" -> "44 neg" [style=solid, label="(1, 4, 3, 8)"]; +"44 neg" -> "45 cat" [style=solid, label="(1, 4, 3, 8)"]; +"45 cat" -> "46 mul_3" [style=solid, label="(1, 4, 3, 16)"]; +"46 mul_3" -> "47 add_1" [style=solid, label="(1, 4, 3, 16)"]; +"47 add_1" -> "55 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; +"48 mul_4" -> "54 add_2" [style=solid, label="(1, 4, 3, 16)"]; +"49 slice_3" -> "52 cat_1" [style=solid, label="(1, 4, 3, 8)"]; +"50 slice_4" -> "51 neg_1" [style=solid, label="(1, 4, 3, 8)"]; +"51 neg_1" -> "52 cat_1" [style=solid, label="(1, 4, 3, 8)"]; +"52 cat_1" -> "53 mul_5" [style=solid, label="(1, 4, 3, 16)"]; +"53 mul_5" -> "54 add_2" [style=solid, label="(1, 4, 3, 16)"]; +"54 add_2" -> "55 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; +"55 scaled_dot_product_attention" -> "56 transpose_3" [style=solid, label="(1, 4, 3, 16)"]; +"56 transpose_3" -> "57 view_3" [style=solid, label="(1, 3, 4, 16)"]; +"57 view_3" -> "58 linear_3" [style=solid, label="(1, 3, 64)"]; +"58 linear_3" -> "59 add_3" [style=solid, label="(1, 3, 64)"]; +"59 add_3" -> "60 _assert_tensor_metadata_default_2" [style=solid, label="(1, 3, 64)"]; +"59 add_3" -> "61 to_2" [style=solid, label="(1, 3, 64)"]; +"61 to_2" -> "62 pow_2" [style=solid, label="(1, 3, 64)"]; +"61 to_2" -> "66 mul_6" [style=solid, label="(1, 3, 64)"]; +"61 to_2" -> "75 add_5" [style=solid, label="(1, 3, 64)"]; +"62 pow_2" -> "63 mean_1" [style=solid, label="(1, 3, 64)"]; +"63 mean_1" -> "64 add_4" [style=solid, label="(1, 3, 1)"]; +"64 add_4" -> "65 rsqrt_1" [style=solid, label="(1, 3, 1)"]; +"65 rsqrt_1" -> "66 mul_6" [style=solid, label="(1, 3, 1)"]; +"66 mul_6" -> "67 _assert_tensor_metadata_default_3" [style=solid, label="(1, 3, 64)"]; +"66 mul_6" -> "68 to_3" [style=solid, label="(1, 3, 64)"]; +"68 to_3" -> "69 mul_7" [style=solid, label="(1, 3, 64)"]; +"69 mul_7" -> "70 linear_4" [style=solid, label="(1, 3, 64)"]; +"69 mul_7" -> "72 linear_5" [style=solid, label="(1, 3, 64)"]; +"70 linear_4" -> "71 silu" [style=solid, label="(1, 3, 128)"]; +"71 silu" -> "73 mul_8" [style=solid, label="(1, 3, 128)"]; +"72 linear_5" -> "73 mul_8" [style=solid, label="(1, 3, 128)"]; +"73 mul_8" -> "74 linear_6" [style=solid, label="(1, 3, 128)"]; +"74 linear_6" -> "75 add_5" [style=solid, label="(1, 3, 64)"]; +"75 add_5" -> "76 output" [style=solid, label="(1, 3, 64)"]; +} diff --git a/tests/torch2/data/fx/ao_compression_OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_False.dot b/tests/torch2/data/fx/ao_compression_OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_False.dot new file mode 100644 index 00000000000..2841824b5a3 --- /dev/null +++ b/tests/torch2/data/fx/ao_compression_OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_False.dot @@ -0,0 +1,24 @@ +strict digraph { +"0 linear_weight_updated_constant0" [id=0, type="get_attr"]; +"1 symmetric_weights_decompressor_linear_weight_0" [id=1, type="call_module"]; +"2 linear_bias" [id=2, type="get_attr"]; +"3 wte_weight_1_updated_constant0" [id=3, type="get_attr"]; +"4 asymmetric_weights_decompressor_wte_weight_1_0" [id=4, type="call_module"]; +"5 lm_head_bias" [id=5, type="get_attr"]; +"6 input_ids" [id=6, type=input]; +"7 embedding" [id=7, type=embedding]; +"8 linear" [id=8, type=linear]; +"9 linear_1" [id=9, type=linear]; +"10 output" [id=10, type=output]; +"0 linear_weight_updated_constant0" -> "1 symmetric_weights_decompressor_linear_weight_0" [style=solid, label="(2048, 1)"]; +"1 symmetric_weights_decompressor_linear_weight_0" -> "8 linear" [style=solid, label="(64, 64)"]; +"2 linear_bias" -> "8 linear" [style=solid, label="(64,)"]; +"3 wte_weight_1_updated_constant0" -> "4 asymmetric_weights_decompressor_wte_weight_1_0" [style=solid, label="(128, 64)"]; +"4 asymmetric_weights_decompressor_wte_weight_1_0" -> "7 embedding" [style=solid, label="(128, 64)"]; +"4 asymmetric_weights_decompressor_wte_weight_1_0" -> "9 linear_1" [style=solid, label="(128, 64)"]; +"5 lm_head_bias" -> "9 linear_1" [style=solid, label="(128,)"]; +"6 input_ids" -> "7 embedding" [style=solid, label="(5,)"]; +"7 embedding" -> "8 linear" [style=solid, label="(5, 64)"]; +"8 linear" -> "9 linear_1" [style=solid, label="(5, 64)"]; +"9 linear_1" -> "10 output" [style=solid, label="(5, 128)"]; +} diff --git a/tests/torch2/data/fx/ao_compression_OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_True.dot b/tests/torch2/data/fx/ao_compression_OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_True.dot new file mode 100644 index 00000000000..0382f7e5934 --- /dev/null +++ b/tests/torch2/data/fx/ao_compression_OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_True.dot @@ -0,0 +1,24 @@ +strict digraph { +"0 linear_weight_updated_constant0" [id=0, type="get_attr"]; +"1 symmetric_weights_decompressor_linear_weight_0" [id=1, type="call_module"]; +"2 linear_bias" [id=2, type="get_attr"]; +"3 wte_weight_1_updated_constant0" [id=3, type="get_attr"]; +"4 symmetric_weights_decompressor_wte_weight_1_0" [id=4, type="call_module"]; +"5 lm_head_bias" [id=5, type="get_attr"]; +"6 input_ids" [id=6, type=input]; +"7 embedding" [id=7, type=embedding]; +"8 linear" [id=8, type=linear]; +"9 linear_1" [id=9, type=linear]; +"10 output" [id=10, type=output]; +"0 linear_weight_updated_constant0" -> "1 symmetric_weights_decompressor_linear_weight_0" [style=solid, label="(2048, 1)"]; +"1 symmetric_weights_decompressor_linear_weight_0" -> "8 linear" [style=solid, label="(64, 64)"]; +"2 linear_bias" -> "8 linear" [style=solid, label="(64,)"]; +"3 wte_weight_1_updated_constant0" -> "4 symmetric_weights_decompressor_wte_weight_1_0" [style=solid, label="(4096, 1)"]; +"4 symmetric_weights_decompressor_wte_weight_1_0" -> "7 embedding" [style=solid, label="(128, 64)"]; +"4 symmetric_weights_decompressor_wte_weight_1_0" -> "9 linear_1" [style=solid, label="(128, 64)"]; +"5 lm_head_bias" -> "9 linear_1" [style=solid, label="(128,)"]; +"6 input_ids" -> "7 embedding" [style=solid, label="(5,)"]; +"7 embedding" -> "8 linear" [style=solid, label="(5, 64)"]; +"8 linear" -> "9 linear_1" [style=solid, label="(5, 64)"]; +"9 linear_1" -> "10 output" [style=solid, label="(5, 128)"]; +} diff --git a/tests/torch2/data/fx/ao_compression_OpenVINOQuantizer/short_transformer_shared/int8wo_asym_gs-1_ratio1_all_layers_False.dot b/tests/torch2/data/fx/ao_compression_OpenVINOQuantizer/short_transformer_shared/int8wo_asym_gs-1_ratio1_all_layers_False.dot new file mode 100644 index 00000000000..03fc9e9c6a0 --- /dev/null +++ b/tests/torch2/data/fx/ao_compression_OpenVINOQuantizer/short_transformer_shared/int8wo_asym_gs-1_ratio1_all_layers_False.dot @@ -0,0 +1,24 @@ +strict digraph { +"0 linear_weight_updated_constant0" [id=0, type="get_attr"]; +"1 asymmetric_weights_decompressor_linear_weight_0" [id=1, type="call_module"]; +"2 linear_bias" [id=2, type="get_attr"]; +"3 wte_weight_1_updated_constant0" [id=3, type="get_attr"]; +"4 asymmetric_weights_decompressor_wte_weight_1_0" [id=4, type="call_module"]; +"5 lm_head_bias" [id=5, type="get_attr"]; +"6 input_ids" [id=6, type=input]; +"7 embedding" [id=7, type=embedding]; +"8 linear" [id=8, type=linear]; +"9 linear_1" [id=9, type=linear]; +"10 output" [id=10, type=output]; +"0 linear_weight_updated_constant0" -> "1 asymmetric_weights_decompressor_linear_weight_0" [style=solid, label="(64, 64)"]; +"1 asymmetric_weights_decompressor_linear_weight_0" -> "8 linear" [style=solid, label="(64, 64)"]; +"2 linear_bias" -> "8 linear" [style=solid, label="(64,)"]; +"3 wte_weight_1_updated_constant0" -> "4 asymmetric_weights_decompressor_wte_weight_1_0" [style=solid, label="(128, 64)"]; +"4 asymmetric_weights_decompressor_wte_weight_1_0" -> "7 embedding" [style=solid, label="(128, 64)"]; +"4 asymmetric_weights_decompressor_wte_weight_1_0" -> "9 linear_1" [style=solid, label="(128, 64)"]; +"5 lm_head_bias" -> "9 linear_1" [style=solid, label="(128,)"]; +"6 input_ids" -> "7 embedding" [style=solid, label="(5,)"]; +"7 embedding" -> "8 linear" [style=solid, label="(5, 64)"]; +"8 linear" -> "9 linear_1" [style=solid, label="(5, 64)"]; +"9 linear_1" -> "10 output" [style=solid, label="(5, 128)"]; +} diff --git a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_False_ref_wc_param.json b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_False_ref_wc_param.json new file mode 100644 index 00000000000..7cfdf2719df --- /dev/null +++ b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_False_ref_wc_param.json @@ -0,0 +1,128 @@ +[ + { + "weight_name": "q_proj_weight", + "node_with_weight": "linear", + "weight_port_id": 1, + "weight_dtype": "float32", + "weight_shape": [ + 64, + 64 + ], + "reduction_axes": [ + 1 + ], + "compression_config": { + "mode": "int4_sym", + "group_size": 32, + "codebook_values": null + } + }, + { + "weight_name": "k_proj_weight", + "node_with_weight": "linear_1", + "weight_port_id": 1, + "weight_dtype": "float32", + "weight_shape": [ + 64, + 64 + ], + "reduction_axes": [ + 1 + ], + "compression_config": { + "mode": "int4_sym", + "group_size": 32, + "codebook_values": null + } + }, + { + "weight_name": "v_proj_weight", + "node_with_weight": "linear_2", + "weight_port_id": 1, + "weight_dtype": "float32", + "weight_shape": [ + 64, + 64 + ], + "reduction_axes": [ + 1 + ], + "compression_config": { + "mode": "int4_sym", + "group_size": 32, + "codebook_values": null + } + }, + { + "weight_name": "o_proj_weight", + "node_with_weight": "linear_3", + "weight_port_id": 1, + "weight_dtype": "float32", + "weight_shape": [ + 64, + 64 + ], + "reduction_axes": [ + 1 + ], + "compression_config": { + "mode": "int4_sym", + "group_size": 32, + "codebook_values": null + } + }, + { + "weight_name": "mlp_gate_proj_weight", + "node_with_weight": "linear_4", + "weight_port_id": 1, + "weight_dtype": "float32", + "weight_shape": [ + 128, + 64 + ], + "reduction_axes": [ + 1 + ], + "compression_config": { + "mode": "int4_sym", + "group_size": 32, + "codebook_values": null + } + }, + { + "weight_name": "mlp_up_proj_weight", + "node_with_weight": "linear_5", + "weight_port_id": 1, + "weight_dtype": "float32", + "weight_shape": [ + 128, + 64 + ], + "reduction_axes": [ + 1 + ], + "compression_config": { + "mode": "int4_sym", + "group_size": 32, + "codebook_values": null + } + }, + { + "weight_name": "mlp_down_proj_weight", + "node_with_weight": "linear_6", + "weight_port_id": 1, + "weight_dtype": "float32", + "weight_shape": [ + 64, + 128 + ], + "reduction_axes": [ + 1 + ], + "compression_config": { + "mode": "int8_asym", + "group_size": -1, + "codebook_values": null + } + } +] \ No newline at end of file diff --git a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_False_sensitivity_metric_hessian_input_activation.dot b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_False_sensitivity_metric_hessian_input_activation.dot new file mode 100644 index 00000000000..076e46114eb --- /dev/null +++ b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_False_sensitivity_metric_hessian_input_activation.dot @@ -0,0 +1,169 @@ +strict digraph { +"0 attn_norm_weight" [id=0, type="get_attr"]; +"1 mlp_norm_weight" [id=1, type="get_attr"]; +"2 rope_cos" [id=2, type="get_attr"]; +"3 rope_sin" [id=3, type="get_attr"]; +"4 x_embed" [id=4, type=input]; +"5 arange" [id=5, type=arange]; +"6 _assert_tensor_metadata_default" [id=6, type="_assert_tensor_metadata"]; +"7 to" [id=7, type=to]; +"8 pow_1" [id=8, type=pow]; +"9 mean" [id=9, type=mean]; +"10 add" [id=10, type=add]; +"11 rsqrt" [id=11, type=rsqrt]; +"12 mul" [id=12, type=mul]; +"13 _assert_tensor_metadata_default_1" [id=13, type="_assert_tensor_metadata"]; +"14 to_1" [id=14, type=to]; +"15 mul_1" [id=15, type=mul]; +"16 q_proj_weight_updated_constant0" [id=16, type="get_attr"]; +"17 symmetric_weights_decompressor_q_proj_weight_0" [id=17, type="call_module"]; +"18 linear" [id=18, type=linear]; +"19 view" [id=19, type=view]; +"20 transpose" [id=20, type=transpose]; +"21 k_proj_weight_updated_constant0" [id=21, type="get_attr"]; +"22 symmetric_weights_decompressor_k_proj_weight_0" [id=22, type="call_module"]; +"23 linear_1" [id=23, type=linear]; +"24 view_1" [id=24, type=view]; +"25 transpose_1" [id=25, type=transpose]; +"26 v_proj_weight_updated_constant0" [id=26, type="get_attr"]; +"27 symmetric_weights_decompressor_v_proj_weight_0" [id=27, type="call_module"]; +"28 linear_2" [id=28, type=linear]; +"29 view_2" [id=29, type=view]; +"30 transpose_2" [id=30, type=transpose]; +"31 index" [id=31, type=index]; +"32 index_1" [id=32, type=index]; +"33 mul_2" [id=33, type=mul]; +"34 slice_1" [id=34, type=slice]; +"35 slice_2" [id=35, type=slice]; +"36 neg" [id=36, type=neg]; +"37 cat" [id=37, type=cat]; +"38 mul_3" [id=38, type=mul]; +"39 add_1" [id=39, type=add]; +"40 mul_4" [id=40, type=mul]; +"41 slice_3" [id=41, type=slice]; +"42 slice_4" [id=42, type=slice]; +"43 neg_1" [id=43, type=neg]; +"44 cat_1" [id=44, type=cat]; +"45 mul_5" [id=45, type=mul]; +"46 add_2" [id=46, type=add]; +"47 scaled_dot_product_attention" [id=47, type="scaled_dot_product_attention"]; +"48 transpose_3" [id=48, type=transpose]; +"49 view_3" [id=49, type=view]; +"50 o_proj_weight_updated_constant0" [id=50, type="get_attr"]; +"51 symmetric_weights_decompressor_o_proj_weight_0" [id=51, type="call_module"]; +"52 linear_3" [id=52, type=linear]; +"53 add_3" [id=53, type=add]; +"54 _assert_tensor_metadata_default_2" [id=54, type="_assert_tensor_metadata"]; +"55 to_2" [id=55, type=to]; +"56 pow_2" [id=56, type=pow]; +"57 mean_1" [id=57, type=mean]; +"58 add_4" [id=58, type=add]; +"59 rsqrt_1" [id=59, type=rsqrt]; +"60 mul_6" [id=60, type=mul]; +"61 _assert_tensor_metadata_default_3" [id=61, type="_assert_tensor_metadata"]; +"62 to_3" [id=62, type=to]; +"63 mul_7" [id=63, type=mul]; +"64 mlp_gate_proj_weight_updated_constant0" [id=64, type="get_attr"]; +"65 symmetric_weights_decompressor_mlp_gate_proj_weight_0" [id=65, type="call_module"]; +"66 linear_4" [id=66, type=linear]; +"67 silu" [id=67, type=silu]; +"68 mlp_up_proj_weight_updated_constant0" [id=68, type="get_attr"]; +"69 asymmetric_weights_decompressor_mlp_up_proj_weight_0" [id=69, type="call_module"]; +"70 linear_5" [id=70, type=linear]; +"71 mul_8" [id=71, type=mul]; +"72 mlp_down_proj_weight_updated_constant0" [id=72, type="get_attr"]; +"73 asymmetric_weights_decompressor_mlp_down_proj_weight_0" [id=73, type="call_module"]; +"74 linear_6" [id=74, type=linear]; +"75 add_5" [id=75, type=add]; +"76 output" [id=76, type=output]; +"0 attn_norm_weight" -> "15 mul_1" [style=solid, label="(64,)"]; +"1 mlp_norm_weight" -> "63 mul_7" [style=solid, label="(64,)"]; +"2 rope_cos" -> "31 index" [style=solid, label="(1, 1, 128, 16)"]; +"3 rope_sin" -> "32 index_1" [style=solid, label="(1, 1, 128, 16)"]; +"4 x_embed" -> "6 _assert_tensor_metadata_default" [style=solid, label="(1, 3, 64)"]; +"4 x_embed" -> "7 to" [style=solid, label="(1, 3, 64)"]; +"4 x_embed" -> "53 add_3" [style=solid, label="(1, 3, 64)"]; +"5 arange" -> "31 index" [style=solid, label="(3,)"]; +"5 arange" -> "32 index_1" [style=solid, label="(3,)"]; +"7 to" -> "8 pow_1" [style=solid, label="(1, 3, 64)"]; +"7 to" -> "12 mul" [style=solid, label="(1, 3, 64)"]; +"8 pow_1" -> "9 mean" [style=solid, label="(1, 3, 64)"]; +"9 mean" -> "10 add" [style=solid, label="(1, 3, 1)"]; +"10 add" -> "11 rsqrt" [style=solid, label="(1, 3, 1)"]; +"11 rsqrt" -> "12 mul" [style=solid, label="(1, 3, 1)"]; +"12 mul" -> "13 _assert_tensor_metadata_default_1" [style=solid, label="(1, 3, 64)"]; +"12 mul" -> "14 to_1" [style=solid, label="(1, 3, 64)"]; +"14 to_1" -> "15 mul_1" [style=solid, label="(1, 3, 64)"]; +"15 mul_1" -> "18 linear" [style=solid, label="(1, 3, 64)"]; +"15 mul_1" -> "23 linear_1" [style=solid, label="(1, 3, 64)"]; +"15 mul_1" -> "28 linear_2" [style=solid, label="(1, 3, 64)"]; +"16 q_proj_weight_updated_constant0" -> "17 symmetric_weights_decompressor_q_proj_weight_0" [style=solid, label="(2048, 1)"]; +"17 symmetric_weights_decompressor_q_proj_weight_0" -> "18 linear" [style=solid, label="(64, 64)"]; +"18 linear" -> "19 view" [style=solid, label="(1, 3, 64)"]; +"19 view" -> "20 transpose" [style=solid, label="(1, 3, 4, 16)"]; +"20 transpose" -> "33 mul_2" [style=solid, label="(1, 4, 3, 16)"]; +"20 transpose" -> "34 slice_1" [style=solid, label="(1, 4, 3, 16)"]; +"20 transpose" -> "35 slice_2" [style=solid, label="(1, 4, 3, 16)"]; +"21 k_proj_weight_updated_constant0" -> "22 symmetric_weights_decompressor_k_proj_weight_0" [style=solid, label="(2048, 1)"]; +"22 symmetric_weights_decompressor_k_proj_weight_0" -> "23 linear_1" [style=solid, label="(64, 64)"]; +"23 linear_1" -> "24 view_1" [style=solid, label="(1, 3, 64)"]; +"24 view_1" -> "25 transpose_1" [style=solid, label="(1, 3, 4, 16)"]; +"25 transpose_1" -> "40 mul_4" [style=solid, label="(1, 4, 3, 16)"]; +"25 transpose_1" -> "41 slice_3" [style=solid, label="(1, 4, 3, 16)"]; +"25 transpose_1" -> "42 slice_4" [style=solid, label="(1, 4, 3, 16)"]; +"26 v_proj_weight_updated_constant0" -> "27 symmetric_weights_decompressor_v_proj_weight_0" [style=solid, label="(2048, 1)"]; +"27 symmetric_weights_decompressor_v_proj_weight_0" -> "28 linear_2" [style=solid, label="(64, 64)"]; +"28 linear_2" -> "29 view_2" [style=solid, label="(1, 3, 64)"]; +"29 view_2" -> "30 transpose_2" [style=solid, label="(1, 3, 4, 16)"]; +"30 transpose_2" -> "47 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; +"31 index" -> "33 mul_2" [style=solid, label="(1, 1, 3, 16)"]; +"31 index" -> "40 mul_4" [style=solid, label="(1, 1, 3, 16)"]; +"32 index_1" -> "38 mul_3" [style=solid, label="(1, 1, 3, 16)"]; +"32 index_1" -> "45 mul_5" [style=solid, label="(1, 1, 3, 16)"]; +"33 mul_2" -> "39 add_1" [style=solid, label="(1, 4, 3, 16)"]; +"34 slice_1" -> "37 cat" [style=solid, label="(1, 4, 3, 8)"]; +"35 slice_2" -> "36 neg" [style=solid, label="(1, 4, 3, 8)"]; +"36 neg" -> "37 cat" [style=solid, label="(1, 4, 3, 8)"]; +"37 cat" -> "38 mul_3" [style=solid, label="(1, 4, 3, 16)"]; +"38 mul_3" -> "39 add_1" [style=solid, label="(1, 4, 3, 16)"]; +"39 add_1" -> "47 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; +"40 mul_4" -> "46 add_2" [style=solid, label="(1, 4, 3, 16)"]; +"41 slice_3" -> "44 cat_1" [style=solid, label="(1, 4, 3, 8)"]; +"42 slice_4" -> "43 neg_1" [style=solid, label="(1, 4, 3, 8)"]; +"43 neg_1" -> "44 cat_1" [style=solid, label="(1, 4, 3, 8)"]; +"44 cat_1" -> "45 mul_5" [style=solid, label="(1, 4, 3, 16)"]; +"45 mul_5" -> "46 add_2" [style=solid, label="(1, 4, 3, 16)"]; +"46 add_2" -> "47 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; +"47 scaled_dot_product_attention" -> "48 transpose_3" [style=solid, label="(1, 4, 3, 16)"]; +"48 transpose_3" -> "49 view_3" [style=solid, label="(1, 3, 4, 16)"]; +"49 view_3" -> "52 linear_3" [style=solid, label="(1, 3, 64)"]; +"50 o_proj_weight_updated_constant0" -> "51 symmetric_weights_decompressor_o_proj_weight_0" [style=solid, label="(2048, 1)"]; +"51 symmetric_weights_decompressor_o_proj_weight_0" -> "52 linear_3" [style=solid, label="(64, 64)"]; +"52 linear_3" -> "53 add_3" [style=solid, label="(1, 3, 64)"]; +"53 add_3" -> "54 _assert_tensor_metadata_default_2" [style=solid, label="(1, 3, 64)"]; +"53 add_3" -> "55 to_2" [style=solid, label="(1, 3, 64)"]; +"55 to_2" -> "56 pow_2" [style=solid, label="(1, 3, 64)"]; +"55 to_2" -> "60 mul_6" [style=solid, label="(1, 3, 64)"]; +"55 to_2" -> "75 add_5" [style=solid, label="(1, 3, 64)"]; +"56 pow_2" -> "57 mean_1" [style=solid, label="(1, 3, 64)"]; +"57 mean_1" -> "58 add_4" [style=solid, label="(1, 3, 1)"]; +"58 add_4" -> "59 rsqrt_1" [style=solid, label="(1, 3, 1)"]; +"59 rsqrt_1" -> "60 mul_6" [style=solid, label="(1, 3, 1)"]; +"60 mul_6" -> "61 _assert_tensor_metadata_default_3" [style=solid, label="(1, 3, 64)"]; +"60 mul_6" -> "62 to_3" [style=solid, label="(1, 3, 64)"]; +"62 to_3" -> "63 mul_7" [style=solid, label="(1, 3, 64)"]; +"63 mul_7" -> "66 linear_4" [style=solid, label="(1, 3, 64)"]; +"63 mul_7" -> "70 linear_5" [style=solid, label="(1, 3, 64)"]; +"64 mlp_gate_proj_weight_updated_constant0" -> "65 symmetric_weights_decompressor_mlp_gate_proj_weight_0" [style=solid, label="(4096, 1)"]; +"65 symmetric_weights_decompressor_mlp_gate_proj_weight_0" -> "66 linear_4" [style=solid, label="(128, 64)"]; +"66 linear_4" -> "67 silu" [style=solid, label="(1, 3, 128)"]; +"67 silu" -> "71 mul_8" [style=solid, label="(1, 3, 128)"]; +"68 mlp_up_proj_weight_updated_constant0" -> "69 asymmetric_weights_decompressor_mlp_up_proj_weight_0" [style=solid, label="(128, 64)"]; +"69 asymmetric_weights_decompressor_mlp_up_proj_weight_0" -> "70 linear_5" [style=solid, label="(128, 64)"]; +"70 linear_5" -> "71 mul_8" [style=solid, label="(1, 3, 128)"]; +"71 mul_8" -> "74 linear_6" [style=solid, label="(1, 3, 128)"]; +"72 mlp_down_proj_weight_updated_constant0" -> "73 asymmetric_weights_decompressor_mlp_down_proj_weight_0" [style=solid, label="(64, 128)"]; +"73 asymmetric_weights_decompressor_mlp_down_proj_weight_0" -> "74 linear_6" [style=solid, label="(64, 128)"]; +"74 linear_6" -> "75 add_5" [style=solid, label="(1, 3, 64)"]; +"75 add_5" -> "76 output" [style=solid, label="(1, 3, 64)"]; +} diff --git a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_False_sensitivity_metric_max_activation_variance.dot b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_False_sensitivity_metric_max_activation_variance.dot new file mode 100644 index 00000000000..076e46114eb --- /dev/null +++ b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_False_sensitivity_metric_max_activation_variance.dot @@ -0,0 +1,169 @@ +strict digraph { +"0 attn_norm_weight" [id=0, type="get_attr"]; +"1 mlp_norm_weight" [id=1, type="get_attr"]; +"2 rope_cos" [id=2, type="get_attr"]; +"3 rope_sin" [id=3, type="get_attr"]; +"4 x_embed" [id=4, type=input]; +"5 arange" [id=5, type=arange]; +"6 _assert_tensor_metadata_default" [id=6, type="_assert_tensor_metadata"]; +"7 to" [id=7, type=to]; +"8 pow_1" [id=8, type=pow]; +"9 mean" [id=9, type=mean]; +"10 add" [id=10, type=add]; +"11 rsqrt" [id=11, type=rsqrt]; +"12 mul" [id=12, type=mul]; +"13 _assert_tensor_metadata_default_1" [id=13, type="_assert_tensor_metadata"]; +"14 to_1" [id=14, type=to]; +"15 mul_1" [id=15, type=mul]; +"16 q_proj_weight_updated_constant0" [id=16, type="get_attr"]; +"17 symmetric_weights_decompressor_q_proj_weight_0" [id=17, type="call_module"]; +"18 linear" [id=18, type=linear]; +"19 view" [id=19, type=view]; +"20 transpose" [id=20, type=transpose]; +"21 k_proj_weight_updated_constant0" [id=21, type="get_attr"]; +"22 symmetric_weights_decompressor_k_proj_weight_0" [id=22, type="call_module"]; +"23 linear_1" [id=23, type=linear]; +"24 view_1" [id=24, type=view]; +"25 transpose_1" [id=25, type=transpose]; +"26 v_proj_weight_updated_constant0" [id=26, type="get_attr"]; +"27 symmetric_weights_decompressor_v_proj_weight_0" [id=27, type="call_module"]; +"28 linear_2" [id=28, type=linear]; +"29 view_2" [id=29, type=view]; +"30 transpose_2" [id=30, type=transpose]; +"31 index" [id=31, type=index]; +"32 index_1" [id=32, type=index]; +"33 mul_2" [id=33, type=mul]; +"34 slice_1" [id=34, type=slice]; +"35 slice_2" [id=35, type=slice]; +"36 neg" [id=36, type=neg]; +"37 cat" [id=37, type=cat]; +"38 mul_3" [id=38, type=mul]; +"39 add_1" [id=39, type=add]; +"40 mul_4" [id=40, type=mul]; +"41 slice_3" [id=41, type=slice]; +"42 slice_4" [id=42, type=slice]; +"43 neg_1" [id=43, type=neg]; +"44 cat_1" [id=44, type=cat]; +"45 mul_5" [id=45, type=mul]; +"46 add_2" [id=46, type=add]; +"47 scaled_dot_product_attention" [id=47, type="scaled_dot_product_attention"]; +"48 transpose_3" [id=48, type=transpose]; +"49 view_3" [id=49, type=view]; +"50 o_proj_weight_updated_constant0" [id=50, type="get_attr"]; +"51 symmetric_weights_decompressor_o_proj_weight_0" [id=51, type="call_module"]; +"52 linear_3" [id=52, type=linear]; +"53 add_3" [id=53, type=add]; +"54 _assert_tensor_metadata_default_2" [id=54, type="_assert_tensor_metadata"]; +"55 to_2" [id=55, type=to]; +"56 pow_2" [id=56, type=pow]; +"57 mean_1" [id=57, type=mean]; +"58 add_4" [id=58, type=add]; +"59 rsqrt_1" [id=59, type=rsqrt]; +"60 mul_6" [id=60, type=mul]; +"61 _assert_tensor_metadata_default_3" [id=61, type="_assert_tensor_metadata"]; +"62 to_3" [id=62, type=to]; +"63 mul_7" [id=63, type=mul]; +"64 mlp_gate_proj_weight_updated_constant0" [id=64, type="get_attr"]; +"65 symmetric_weights_decompressor_mlp_gate_proj_weight_0" [id=65, type="call_module"]; +"66 linear_4" [id=66, type=linear]; +"67 silu" [id=67, type=silu]; +"68 mlp_up_proj_weight_updated_constant0" [id=68, type="get_attr"]; +"69 asymmetric_weights_decompressor_mlp_up_proj_weight_0" [id=69, type="call_module"]; +"70 linear_5" [id=70, type=linear]; +"71 mul_8" [id=71, type=mul]; +"72 mlp_down_proj_weight_updated_constant0" [id=72, type="get_attr"]; +"73 asymmetric_weights_decompressor_mlp_down_proj_weight_0" [id=73, type="call_module"]; +"74 linear_6" [id=74, type=linear]; +"75 add_5" [id=75, type=add]; +"76 output" [id=76, type=output]; +"0 attn_norm_weight" -> "15 mul_1" [style=solid, label="(64,)"]; +"1 mlp_norm_weight" -> "63 mul_7" [style=solid, label="(64,)"]; +"2 rope_cos" -> "31 index" [style=solid, label="(1, 1, 128, 16)"]; +"3 rope_sin" -> "32 index_1" [style=solid, label="(1, 1, 128, 16)"]; +"4 x_embed" -> "6 _assert_tensor_metadata_default" [style=solid, label="(1, 3, 64)"]; +"4 x_embed" -> "7 to" [style=solid, label="(1, 3, 64)"]; +"4 x_embed" -> "53 add_3" [style=solid, label="(1, 3, 64)"]; +"5 arange" -> "31 index" [style=solid, label="(3,)"]; +"5 arange" -> "32 index_1" [style=solid, label="(3,)"]; +"7 to" -> "8 pow_1" [style=solid, label="(1, 3, 64)"]; +"7 to" -> "12 mul" [style=solid, label="(1, 3, 64)"]; +"8 pow_1" -> "9 mean" [style=solid, label="(1, 3, 64)"]; +"9 mean" -> "10 add" [style=solid, label="(1, 3, 1)"]; +"10 add" -> "11 rsqrt" [style=solid, label="(1, 3, 1)"]; +"11 rsqrt" -> "12 mul" [style=solid, label="(1, 3, 1)"]; +"12 mul" -> "13 _assert_tensor_metadata_default_1" [style=solid, label="(1, 3, 64)"]; +"12 mul" -> "14 to_1" [style=solid, label="(1, 3, 64)"]; +"14 to_1" -> "15 mul_1" [style=solid, label="(1, 3, 64)"]; +"15 mul_1" -> "18 linear" [style=solid, label="(1, 3, 64)"]; +"15 mul_1" -> "23 linear_1" [style=solid, label="(1, 3, 64)"]; +"15 mul_1" -> "28 linear_2" [style=solid, label="(1, 3, 64)"]; +"16 q_proj_weight_updated_constant0" -> "17 symmetric_weights_decompressor_q_proj_weight_0" [style=solid, label="(2048, 1)"]; +"17 symmetric_weights_decompressor_q_proj_weight_0" -> "18 linear" [style=solid, label="(64, 64)"]; +"18 linear" -> "19 view" [style=solid, label="(1, 3, 64)"]; +"19 view" -> "20 transpose" [style=solid, label="(1, 3, 4, 16)"]; +"20 transpose" -> "33 mul_2" [style=solid, label="(1, 4, 3, 16)"]; +"20 transpose" -> "34 slice_1" [style=solid, label="(1, 4, 3, 16)"]; +"20 transpose" -> "35 slice_2" [style=solid, label="(1, 4, 3, 16)"]; +"21 k_proj_weight_updated_constant0" -> "22 symmetric_weights_decompressor_k_proj_weight_0" [style=solid, label="(2048, 1)"]; +"22 symmetric_weights_decompressor_k_proj_weight_0" -> "23 linear_1" [style=solid, label="(64, 64)"]; +"23 linear_1" -> "24 view_1" [style=solid, label="(1, 3, 64)"]; +"24 view_1" -> "25 transpose_1" [style=solid, label="(1, 3, 4, 16)"]; +"25 transpose_1" -> "40 mul_4" [style=solid, label="(1, 4, 3, 16)"]; +"25 transpose_1" -> "41 slice_3" [style=solid, label="(1, 4, 3, 16)"]; +"25 transpose_1" -> "42 slice_4" [style=solid, label="(1, 4, 3, 16)"]; +"26 v_proj_weight_updated_constant0" -> "27 symmetric_weights_decompressor_v_proj_weight_0" [style=solid, label="(2048, 1)"]; +"27 symmetric_weights_decompressor_v_proj_weight_0" -> "28 linear_2" [style=solid, label="(64, 64)"]; +"28 linear_2" -> "29 view_2" [style=solid, label="(1, 3, 64)"]; +"29 view_2" -> "30 transpose_2" [style=solid, label="(1, 3, 4, 16)"]; +"30 transpose_2" -> "47 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; +"31 index" -> "33 mul_2" [style=solid, label="(1, 1, 3, 16)"]; +"31 index" -> "40 mul_4" [style=solid, label="(1, 1, 3, 16)"]; +"32 index_1" -> "38 mul_3" [style=solid, label="(1, 1, 3, 16)"]; +"32 index_1" -> "45 mul_5" [style=solid, label="(1, 1, 3, 16)"]; +"33 mul_2" -> "39 add_1" [style=solid, label="(1, 4, 3, 16)"]; +"34 slice_1" -> "37 cat" [style=solid, label="(1, 4, 3, 8)"]; +"35 slice_2" -> "36 neg" [style=solid, label="(1, 4, 3, 8)"]; +"36 neg" -> "37 cat" [style=solid, label="(1, 4, 3, 8)"]; +"37 cat" -> "38 mul_3" [style=solid, label="(1, 4, 3, 16)"]; +"38 mul_3" -> "39 add_1" [style=solid, label="(1, 4, 3, 16)"]; +"39 add_1" -> "47 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; +"40 mul_4" -> "46 add_2" [style=solid, label="(1, 4, 3, 16)"]; +"41 slice_3" -> "44 cat_1" [style=solid, label="(1, 4, 3, 8)"]; +"42 slice_4" -> "43 neg_1" [style=solid, label="(1, 4, 3, 8)"]; +"43 neg_1" -> "44 cat_1" [style=solid, label="(1, 4, 3, 8)"]; +"44 cat_1" -> "45 mul_5" [style=solid, label="(1, 4, 3, 16)"]; +"45 mul_5" -> "46 add_2" [style=solid, label="(1, 4, 3, 16)"]; +"46 add_2" -> "47 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; +"47 scaled_dot_product_attention" -> "48 transpose_3" [style=solid, label="(1, 4, 3, 16)"]; +"48 transpose_3" -> "49 view_3" [style=solid, label="(1, 3, 4, 16)"]; +"49 view_3" -> "52 linear_3" [style=solid, label="(1, 3, 64)"]; +"50 o_proj_weight_updated_constant0" -> "51 symmetric_weights_decompressor_o_proj_weight_0" [style=solid, label="(2048, 1)"]; +"51 symmetric_weights_decompressor_o_proj_weight_0" -> "52 linear_3" [style=solid, label="(64, 64)"]; +"52 linear_3" -> "53 add_3" [style=solid, label="(1, 3, 64)"]; +"53 add_3" -> "54 _assert_tensor_metadata_default_2" [style=solid, label="(1, 3, 64)"]; +"53 add_3" -> "55 to_2" [style=solid, label="(1, 3, 64)"]; +"55 to_2" -> "56 pow_2" [style=solid, label="(1, 3, 64)"]; +"55 to_2" -> "60 mul_6" [style=solid, label="(1, 3, 64)"]; +"55 to_2" -> "75 add_5" [style=solid, label="(1, 3, 64)"]; +"56 pow_2" -> "57 mean_1" [style=solid, label="(1, 3, 64)"]; +"57 mean_1" -> "58 add_4" [style=solid, label="(1, 3, 1)"]; +"58 add_4" -> "59 rsqrt_1" [style=solid, label="(1, 3, 1)"]; +"59 rsqrt_1" -> "60 mul_6" [style=solid, label="(1, 3, 1)"]; +"60 mul_6" -> "61 _assert_tensor_metadata_default_3" [style=solid, label="(1, 3, 64)"]; +"60 mul_6" -> "62 to_3" [style=solid, label="(1, 3, 64)"]; +"62 to_3" -> "63 mul_7" [style=solid, label="(1, 3, 64)"]; +"63 mul_7" -> "66 linear_4" [style=solid, label="(1, 3, 64)"]; +"63 mul_7" -> "70 linear_5" [style=solid, label="(1, 3, 64)"]; +"64 mlp_gate_proj_weight_updated_constant0" -> "65 symmetric_weights_decompressor_mlp_gate_proj_weight_0" [style=solid, label="(4096, 1)"]; +"65 symmetric_weights_decompressor_mlp_gate_proj_weight_0" -> "66 linear_4" [style=solid, label="(128, 64)"]; +"66 linear_4" -> "67 silu" [style=solid, label="(1, 3, 128)"]; +"67 silu" -> "71 mul_8" [style=solid, label="(1, 3, 128)"]; +"68 mlp_up_proj_weight_updated_constant0" -> "69 asymmetric_weights_decompressor_mlp_up_proj_weight_0" [style=solid, label="(128, 64)"]; +"69 asymmetric_weights_decompressor_mlp_up_proj_weight_0" -> "70 linear_5" [style=solid, label="(128, 64)"]; +"70 linear_5" -> "71 mul_8" [style=solid, label="(1, 3, 128)"]; +"71 mul_8" -> "74 linear_6" [style=solid, label="(1, 3, 128)"]; +"72 mlp_down_proj_weight_updated_constant0" -> "73 asymmetric_weights_decompressor_mlp_down_proj_weight_0" [style=solid, label="(64, 128)"]; +"73 asymmetric_weights_decompressor_mlp_down_proj_weight_0" -> "74 linear_6" [style=solid, label="(64, 128)"]; +"74 linear_6" -> "75 add_5" [style=solid, label="(1, 3, 64)"]; +"75 add_5" -> "76 output" [style=solid, label="(1, 3, 64)"]; +} diff --git a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_False_sensitivity_metric_mean_activation_magnitude.dot b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_False_sensitivity_metric_mean_activation_magnitude.dot new file mode 100644 index 00000000000..076e46114eb --- /dev/null +++ b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_False_sensitivity_metric_mean_activation_magnitude.dot @@ -0,0 +1,169 @@ +strict digraph { +"0 attn_norm_weight" [id=0, type="get_attr"]; +"1 mlp_norm_weight" [id=1, type="get_attr"]; +"2 rope_cos" [id=2, type="get_attr"]; +"3 rope_sin" [id=3, type="get_attr"]; +"4 x_embed" [id=4, type=input]; +"5 arange" [id=5, type=arange]; +"6 _assert_tensor_metadata_default" [id=6, type="_assert_tensor_metadata"]; +"7 to" [id=7, type=to]; +"8 pow_1" [id=8, type=pow]; +"9 mean" [id=9, type=mean]; +"10 add" [id=10, type=add]; +"11 rsqrt" [id=11, type=rsqrt]; +"12 mul" [id=12, type=mul]; +"13 _assert_tensor_metadata_default_1" [id=13, type="_assert_tensor_metadata"]; +"14 to_1" [id=14, type=to]; +"15 mul_1" [id=15, type=mul]; +"16 q_proj_weight_updated_constant0" [id=16, type="get_attr"]; +"17 symmetric_weights_decompressor_q_proj_weight_0" [id=17, type="call_module"]; +"18 linear" [id=18, type=linear]; +"19 view" [id=19, type=view]; +"20 transpose" [id=20, type=transpose]; +"21 k_proj_weight_updated_constant0" [id=21, type="get_attr"]; +"22 symmetric_weights_decompressor_k_proj_weight_0" [id=22, type="call_module"]; +"23 linear_1" [id=23, type=linear]; +"24 view_1" [id=24, type=view]; +"25 transpose_1" [id=25, type=transpose]; +"26 v_proj_weight_updated_constant0" [id=26, type="get_attr"]; +"27 symmetric_weights_decompressor_v_proj_weight_0" [id=27, type="call_module"]; +"28 linear_2" [id=28, type=linear]; +"29 view_2" [id=29, type=view]; +"30 transpose_2" [id=30, type=transpose]; +"31 index" [id=31, type=index]; +"32 index_1" [id=32, type=index]; +"33 mul_2" [id=33, type=mul]; +"34 slice_1" [id=34, type=slice]; +"35 slice_2" [id=35, type=slice]; +"36 neg" [id=36, type=neg]; +"37 cat" [id=37, type=cat]; +"38 mul_3" [id=38, type=mul]; +"39 add_1" [id=39, type=add]; +"40 mul_4" [id=40, type=mul]; +"41 slice_3" [id=41, type=slice]; +"42 slice_4" [id=42, type=slice]; +"43 neg_1" [id=43, type=neg]; +"44 cat_1" [id=44, type=cat]; +"45 mul_5" [id=45, type=mul]; +"46 add_2" [id=46, type=add]; +"47 scaled_dot_product_attention" [id=47, type="scaled_dot_product_attention"]; +"48 transpose_3" [id=48, type=transpose]; +"49 view_3" [id=49, type=view]; +"50 o_proj_weight_updated_constant0" [id=50, type="get_attr"]; +"51 symmetric_weights_decompressor_o_proj_weight_0" [id=51, type="call_module"]; +"52 linear_3" [id=52, type=linear]; +"53 add_3" [id=53, type=add]; +"54 _assert_tensor_metadata_default_2" [id=54, type="_assert_tensor_metadata"]; +"55 to_2" [id=55, type=to]; +"56 pow_2" [id=56, type=pow]; +"57 mean_1" [id=57, type=mean]; +"58 add_4" [id=58, type=add]; +"59 rsqrt_1" [id=59, type=rsqrt]; +"60 mul_6" [id=60, type=mul]; +"61 _assert_tensor_metadata_default_3" [id=61, type="_assert_tensor_metadata"]; +"62 to_3" [id=62, type=to]; +"63 mul_7" [id=63, type=mul]; +"64 mlp_gate_proj_weight_updated_constant0" [id=64, type="get_attr"]; +"65 symmetric_weights_decompressor_mlp_gate_proj_weight_0" [id=65, type="call_module"]; +"66 linear_4" [id=66, type=linear]; +"67 silu" [id=67, type=silu]; +"68 mlp_up_proj_weight_updated_constant0" [id=68, type="get_attr"]; +"69 asymmetric_weights_decompressor_mlp_up_proj_weight_0" [id=69, type="call_module"]; +"70 linear_5" [id=70, type=linear]; +"71 mul_8" [id=71, type=mul]; +"72 mlp_down_proj_weight_updated_constant0" [id=72, type="get_attr"]; +"73 asymmetric_weights_decompressor_mlp_down_proj_weight_0" [id=73, type="call_module"]; +"74 linear_6" [id=74, type=linear]; +"75 add_5" [id=75, type=add]; +"76 output" [id=76, type=output]; +"0 attn_norm_weight" -> "15 mul_1" [style=solid, label="(64,)"]; +"1 mlp_norm_weight" -> "63 mul_7" [style=solid, label="(64,)"]; +"2 rope_cos" -> "31 index" [style=solid, label="(1, 1, 128, 16)"]; +"3 rope_sin" -> "32 index_1" [style=solid, label="(1, 1, 128, 16)"]; +"4 x_embed" -> "6 _assert_tensor_metadata_default" [style=solid, label="(1, 3, 64)"]; +"4 x_embed" -> "7 to" [style=solid, label="(1, 3, 64)"]; +"4 x_embed" -> "53 add_3" [style=solid, label="(1, 3, 64)"]; +"5 arange" -> "31 index" [style=solid, label="(3,)"]; +"5 arange" -> "32 index_1" [style=solid, label="(3,)"]; +"7 to" -> "8 pow_1" [style=solid, label="(1, 3, 64)"]; +"7 to" -> "12 mul" [style=solid, label="(1, 3, 64)"]; +"8 pow_1" -> "9 mean" [style=solid, label="(1, 3, 64)"]; +"9 mean" -> "10 add" [style=solid, label="(1, 3, 1)"]; +"10 add" -> "11 rsqrt" [style=solid, label="(1, 3, 1)"]; +"11 rsqrt" -> "12 mul" [style=solid, label="(1, 3, 1)"]; +"12 mul" -> "13 _assert_tensor_metadata_default_1" [style=solid, label="(1, 3, 64)"]; +"12 mul" -> "14 to_1" [style=solid, label="(1, 3, 64)"]; +"14 to_1" -> "15 mul_1" [style=solid, label="(1, 3, 64)"]; +"15 mul_1" -> "18 linear" [style=solid, label="(1, 3, 64)"]; +"15 mul_1" -> "23 linear_1" [style=solid, label="(1, 3, 64)"]; +"15 mul_1" -> "28 linear_2" [style=solid, label="(1, 3, 64)"]; +"16 q_proj_weight_updated_constant0" -> "17 symmetric_weights_decompressor_q_proj_weight_0" [style=solid, label="(2048, 1)"]; +"17 symmetric_weights_decompressor_q_proj_weight_0" -> "18 linear" [style=solid, label="(64, 64)"]; +"18 linear" -> "19 view" [style=solid, label="(1, 3, 64)"]; +"19 view" -> "20 transpose" [style=solid, label="(1, 3, 4, 16)"]; +"20 transpose" -> "33 mul_2" [style=solid, label="(1, 4, 3, 16)"]; +"20 transpose" -> "34 slice_1" [style=solid, label="(1, 4, 3, 16)"]; +"20 transpose" -> "35 slice_2" [style=solid, label="(1, 4, 3, 16)"]; +"21 k_proj_weight_updated_constant0" -> "22 symmetric_weights_decompressor_k_proj_weight_0" [style=solid, label="(2048, 1)"]; +"22 symmetric_weights_decompressor_k_proj_weight_0" -> "23 linear_1" [style=solid, label="(64, 64)"]; +"23 linear_1" -> "24 view_1" [style=solid, label="(1, 3, 64)"]; +"24 view_1" -> "25 transpose_1" [style=solid, label="(1, 3, 4, 16)"]; +"25 transpose_1" -> "40 mul_4" [style=solid, label="(1, 4, 3, 16)"]; +"25 transpose_1" -> "41 slice_3" [style=solid, label="(1, 4, 3, 16)"]; +"25 transpose_1" -> "42 slice_4" [style=solid, label="(1, 4, 3, 16)"]; +"26 v_proj_weight_updated_constant0" -> "27 symmetric_weights_decompressor_v_proj_weight_0" [style=solid, label="(2048, 1)"]; +"27 symmetric_weights_decompressor_v_proj_weight_0" -> "28 linear_2" [style=solid, label="(64, 64)"]; +"28 linear_2" -> "29 view_2" [style=solid, label="(1, 3, 64)"]; +"29 view_2" -> "30 transpose_2" [style=solid, label="(1, 3, 4, 16)"]; +"30 transpose_2" -> "47 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; +"31 index" -> "33 mul_2" [style=solid, label="(1, 1, 3, 16)"]; +"31 index" -> "40 mul_4" [style=solid, label="(1, 1, 3, 16)"]; +"32 index_1" -> "38 mul_3" [style=solid, label="(1, 1, 3, 16)"]; +"32 index_1" -> "45 mul_5" [style=solid, label="(1, 1, 3, 16)"]; +"33 mul_2" -> "39 add_1" [style=solid, label="(1, 4, 3, 16)"]; +"34 slice_1" -> "37 cat" [style=solid, label="(1, 4, 3, 8)"]; +"35 slice_2" -> "36 neg" [style=solid, label="(1, 4, 3, 8)"]; +"36 neg" -> "37 cat" [style=solid, label="(1, 4, 3, 8)"]; +"37 cat" -> "38 mul_3" [style=solid, label="(1, 4, 3, 16)"]; +"38 mul_3" -> "39 add_1" [style=solid, label="(1, 4, 3, 16)"]; +"39 add_1" -> "47 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; +"40 mul_4" -> "46 add_2" [style=solid, label="(1, 4, 3, 16)"]; +"41 slice_3" -> "44 cat_1" [style=solid, label="(1, 4, 3, 8)"]; +"42 slice_4" -> "43 neg_1" [style=solid, label="(1, 4, 3, 8)"]; +"43 neg_1" -> "44 cat_1" [style=solid, label="(1, 4, 3, 8)"]; +"44 cat_1" -> "45 mul_5" [style=solid, label="(1, 4, 3, 16)"]; +"45 mul_5" -> "46 add_2" [style=solid, label="(1, 4, 3, 16)"]; +"46 add_2" -> "47 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; +"47 scaled_dot_product_attention" -> "48 transpose_3" [style=solid, label="(1, 4, 3, 16)"]; +"48 transpose_3" -> "49 view_3" [style=solid, label="(1, 3, 4, 16)"]; +"49 view_3" -> "52 linear_3" [style=solid, label="(1, 3, 64)"]; +"50 o_proj_weight_updated_constant0" -> "51 symmetric_weights_decompressor_o_proj_weight_0" [style=solid, label="(2048, 1)"]; +"51 symmetric_weights_decompressor_o_proj_weight_0" -> "52 linear_3" [style=solid, label="(64, 64)"]; +"52 linear_3" -> "53 add_3" [style=solid, label="(1, 3, 64)"]; +"53 add_3" -> "54 _assert_tensor_metadata_default_2" [style=solid, label="(1, 3, 64)"]; +"53 add_3" -> "55 to_2" [style=solid, label="(1, 3, 64)"]; +"55 to_2" -> "56 pow_2" [style=solid, label="(1, 3, 64)"]; +"55 to_2" -> "60 mul_6" [style=solid, label="(1, 3, 64)"]; +"55 to_2" -> "75 add_5" [style=solid, label="(1, 3, 64)"]; +"56 pow_2" -> "57 mean_1" [style=solid, label="(1, 3, 64)"]; +"57 mean_1" -> "58 add_4" [style=solid, label="(1, 3, 1)"]; +"58 add_4" -> "59 rsqrt_1" [style=solid, label="(1, 3, 1)"]; +"59 rsqrt_1" -> "60 mul_6" [style=solid, label="(1, 3, 1)"]; +"60 mul_6" -> "61 _assert_tensor_metadata_default_3" [style=solid, label="(1, 3, 64)"]; +"60 mul_6" -> "62 to_3" [style=solid, label="(1, 3, 64)"]; +"62 to_3" -> "63 mul_7" [style=solid, label="(1, 3, 64)"]; +"63 mul_7" -> "66 linear_4" [style=solid, label="(1, 3, 64)"]; +"63 mul_7" -> "70 linear_5" [style=solid, label="(1, 3, 64)"]; +"64 mlp_gate_proj_weight_updated_constant0" -> "65 symmetric_weights_decompressor_mlp_gate_proj_weight_0" [style=solid, label="(4096, 1)"]; +"65 symmetric_weights_decompressor_mlp_gate_proj_weight_0" -> "66 linear_4" [style=solid, label="(128, 64)"]; +"66 linear_4" -> "67 silu" [style=solid, label="(1, 3, 128)"]; +"67 silu" -> "71 mul_8" [style=solid, label="(1, 3, 128)"]; +"68 mlp_up_proj_weight_updated_constant0" -> "69 asymmetric_weights_decompressor_mlp_up_proj_weight_0" [style=solid, label="(128, 64)"]; +"69 asymmetric_weights_decompressor_mlp_up_proj_weight_0" -> "70 linear_5" [style=solid, label="(128, 64)"]; +"70 linear_5" -> "71 mul_8" [style=solid, label="(1, 3, 128)"]; +"71 mul_8" -> "74 linear_6" [style=solid, label="(1, 3, 128)"]; +"72 mlp_down_proj_weight_updated_constant0" -> "73 asymmetric_weights_decompressor_mlp_down_proj_weight_0" [style=solid, label="(64, 128)"]; +"73 asymmetric_weights_decompressor_mlp_down_proj_weight_0" -> "74 linear_6" [style=solid, label="(64, 128)"]; +"74 linear_6" -> "75 add_5" [style=solid, label="(1, 3, 64)"]; +"75 add_5" -> "76 output" [style=solid, label="(1, 3, 64)"]; +} diff --git a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_False_sensitivity_metric_mean_activation_variance.dot b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_False_sensitivity_metric_mean_activation_variance.dot new file mode 100644 index 00000000000..076e46114eb --- /dev/null +++ b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_False_sensitivity_metric_mean_activation_variance.dot @@ -0,0 +1,169 @@ +strict digraph { +"0 attn_norm_weight" [id=0, type="get_attr"]; +"1 mlp_norm_weight" [id=1, type="get_attr"]; +"2 rope_cos" [id=2, type="get_attr"]; +"3 rope_sin" [id=3, type="get_attr"]; +"4 x_embed" [id=4, type=input]; +"5 arange" [id=5, type=arange]; +"6 _assert_tensor_metadata_default" [id=6, type="_assert_tensor_metadata"]; +"7 to" [id=7, type=to]; +"8 pow_1" [id=8, type=pow]; +"9 mean" [id=9, type=mean]; +"10 add" [id=10, type=add]; +"11 rsqrt" [id=11, type=rsqrt]; +"12 mul" [id=12, type=mul]; +"13 _assert_tensor_metadata_default_1" [id=13, type="_assert_tensor_metadata"]; +"14 to_1" [id=14, type=to]; +"15 mul_1" [id=15, type=mul]; +"16 q_proj_weight_updated_constant0" [id=16, type="get_attr"]; +"17 symmetric_weights_decompressor_q_proj_weight_0" [id=17, type="call_module"]; +"18 linear" [id=18, type=linear]; +"19 view" [id=19, type=view]; +"20 transpose" [id=20, type=transpose]; +"21 k_proj_weight_updated_constant0" [id=21, type="get_attr"]; +"22 symmetric_weights_decompressor_k_proj_weight_0" [id=22, type="call_module"]; +"23 linear_1" [id=23, type=linear]; +"24 view_1" [id=24, type=view]; +"25 transpose_1" [id=25, type=transpose]; +"26 v_proj_weight_updated_constant0" [id=26, type="get_attr"]; +"27 symmetric_weights_decompressor_v_proj_weight_0" [id=27, type="call_module"]; +"28 linear_2" [id=28, type=linear]; +"29 view_2" [id=29, type=view]; +"30 transpose_2" [id=30, type=transpose]; +"31 index" [id=31, type=index]; +"32 index_1" [id=32, type=index]; +"33 mul_2" [id=33, type=mul]; +"34 slice_1" [id=34, type=slice]; +"35 slice_2" [id=35, type=slice]; +"36 neg" [id=36, type=neg]; +"37 cat" [id=37, type=cat]; +"38 mul_3" [id=38, type=mul]; +"39 add_1" [id=39, type=add]; +"40 mul_4" [id=40, type=mul]; +"41 slice_3" [id=41, type=slice]; +"42 slice_4" [id=42, type=slice]; +"43 neg_1" [id=43, type=neg]; +"44 cat_1" [id=44, type=cat]; +"45 mul_5" [id=45, type=mul]; +"46 add_2" [id=46, type=add]; +"47 scaled_dot_product_attention" [id=47, type="scaled_dot_product_attention"]; +"48 transpose_3" [id=48, type=transpose]; +"49 view_3" [id=49, type=view]; +"50 o_proj_weight_updated_constant0" [id=50, type="get_attr"]; +"51 symmetric_weights_decompressor_o_proj_weight_0" [id=51, type="call_module"]; +"52 linear_3" [id=52, type=linear]; +"53 add_3" [id=53, type=add]; +"54 _assert_tensor_metadata_default_2" [id=54, type="_assert_tensor_metadata"]; +"55 to_2" [id=55, type=to]; +"56 pow_2" [id=56, type=pow]; +"57 mean_1" [id=57, type=mean]; +"58 add_4" [id=58, type=add]; +"59 rsqrt_1" [id=59, type=rsqrt]; +"60 mul_6" [id=60, type=mul]; +"61 _assert_tensor_metadata_default_3" [id=61, type="_assert_tensor_metadata"]; +"62 to_3" [id=62, type=to]; +"63 mul_7" [id=63, type=mul]; +"64 mlp_gate_proj_weight_updated_constant0" [id=64, type="get_attr"]; +"65 symmetric_weights_decompressor_mlp_gate_proj_weight_0" [id=65, type="call_module"]; +"66 linear_4" [id=66, type=linear]; +"67 silu" [id=67, type=silu]; +"68 mlp_up_proj_weight_updated_constant0" [id=68, type="get_attr"]; +"69 asymmetric_weights_decompressor_mlp_up_proj_weight_0" [id=69, type="call_module"]; +"70 linear_5" [id=70, type=linear]; +"71 mul_8" [id=71, type=mul]; +"72 mlp_down_proj_weight_updated_constant0" [id=72, type="get_attr"]; +"73 asymmetric_weights_decompressor_mlp_down_proj_weight_0" [id=73, type="call_module"]; +"74 linear_6" [id=74, type=linear]; +"75 add_5" [id=75, type=add]; +"76 output" [id=76, type=output]; +"0 attn_norm_weight" -> "15 mul_1" [style=solid, label="(64,)"]; +"1 mlp_norm_weight" -> "63 mul_7" [style=solid, label="(64,)"]; +"2 rope_cos" -> "31 index" [style=solid, label="(1, 1, 128, 16)"]; +"3 rope_sin" -> "32 index_1" [style=solid, label="(1, 1, 128, 16)"]; +"4 x_embed" -> "6 _assert_tensor_metadata_default" [style=solid, label="(1, 3, 64)"]; +"4 x_embed" -> "7 to" [style=solid, label="(1, 3, 64)"]; +"4 x_embed" -> "53 add_3" [style=solid, label="(1, 3, 64)"]; +"5 arange" -> "31 index" [style=solid, label="(3,)"]; +"5 arange" -> "32 index_1" [style=solid, label="(3,)"]; +"7 to" -> "8 pow_1" [style=solid, label="(1, 3, 64)"]; +"7 to" -> "12 mul" [style=solid, label="(1, 3, 64)"]; +"8 pow_1" -> "9 mean" [style=solid, label="(1, 3, 64)"]; +"9 mean" -> "10 add" [style=solid, label="(1, 3, 1)"]; +"10 add" -> "11 rsqrt" [style=solid, label="(1, 3, 1)"]; +"11 rsqrt" -> "12 mul" [style=solid, label="(1, 3, 1)"]; +"12 mul" -> "13 _assert_tensor_metadata_default_1" [style=solid, label="(1, 3, 64)"]; +"12 mul" -> "14 to_1" [style=solid, label="(1, 3, 64)"]; +"14 to_1" -> "15 mul_1" [style=solid, label="(1, 3, 64)"]; +"15 mul_1" -> "18 linear" [style=solid, label="(1, 3, 64)"]; +"15 mul_1" -> "23 linear_1" [style=solid, label="(1, 3, 64)"]; +"15 mul_1" -> "28 linear_2" [style=solid, label="(1, 3, 64)"]; +"16 q_proj_weight_updated_constant0" -> "17 symmetric_weights_decompressor_q_proj_weight_0" [style=solid, label="(2048, 1)"]; +"17 symmetric_weights_decompressor_q_proj_weight_0" -> "18 linear" [style=solid, label="(64, 64)"]; +"18 linear" -> "19 view" [style=solid, label="(1, 3, 64)"]; +"19 view" -> "20 transpose" [style=solid, label="(1, 3, 4, 16)"]; +"20 transpose" -> "33 mul_2" [style=solid, label="(1, 4, 3, 16)"]; +"20 transpose" -> "34 slice_1" [style=solid, label="(1, 4, 3, 16)"]; +"20 transpose" -> "35 slice_2" [style=solid, label="(1, 4, 3, 16)"]; +"21 k_proj_weight_updated_constant0" -> "22 symmetric_weights_decompressor_k_proj_weight_0" [style=solid, label="(2048, 1)"]; +"22 symmetric_weights_decompressor_k_proj_weight_0" -> "23 linear_1" [style=solid, label="(64, 64)"]; +"23 linear_1" -> "24 view_1" [style=solid, label="(1, 3, 64)"]; +"24 view_1" -> "25 transpose_1" [style=solid, label="(1, 3, 4, 16)"]; +"25 transpose_1" -> "40 mul_4" [style=solid, label="(1, 4, 3, 16)"]; +"25 transpose_1" -> "41 slice_3" [style=solid, label="(1, 4, 3, 16)"]; +"25 transpose_1" -> "42 slice_4" [style=solid, label="(1, 4, 3, 16)"]; +"26 v_proj_weight_updated_constant0" -> "27 symmetric_weights_decompressor_v_proj_weight_0" [style=solid, label="(2048, 1)"]; +"27 symmetric_weights_decompressor_v_proj_weight_0" -> "28 linear_2" [style=solid, label="(64, 64)"]; +"28 linear_2" -> "29 view_2" [style=solid, label="(1, 3, 64)"]; +"29 view_2" -> "30 transpose_2" [style=solid, label="(1, 3, 4, 16)"]; +"30 transpose_2" -> "47 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; +"31 index" -> "33 mul_2" [style=solid, label="(1, 1, 3, 16)"]; +"31 index" -> "40 mul_4" [style=solid, label="(1, 1, 3, 16)"]; +"32 index_1" -> "38 mul_3" [style=solid, label="(1, 1, 3, 16)"]; +"32 index_1" -> "45 mul_5" [style=solid, label="(1, 1, 3, 16)"]; +"33 mul_2" -> "39 add_1" [style=solid, label="(1, 4, 3, 16)"]; +"34 slice_1" -> "37 cat" [style=solid, label="(1, 4, 3, 8)"]; +"35 slice_2" -> "36 neg" [style=solid, label="(1, 4, 3, 8)"]; +"36 neg" -> "37 cat" [style=solid, label="(1, 4, 3, 8)"]; +"37 cat" -> "38 mul_3" [style=solid, label="(1, 4, 3, 16)"]; +"38 mul_3" -> "39 add_1" [style=solid, label="(1, 4, 3, 16)"]; +"39 add_1" -> "47 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; +"40 mul_4" -> "46 add_2" [style=solid, label="(1, 4, 3, 16)"]; +"41 slice_3" -> "44 cat_1" [style=solid, label="(1, 4, 3, 8)"]; +"42 slice_4" -> "43 neg_1" [style=solid, label="(1, 4, 3, 8)"]; +"43 neg_1" -> "44 cat_1" [style=solid, label="(1, 4, 3, 8)"]; +"44 cat_1" -> "45 mul_5" [style=solid, label="(1, 4, 3, 16)"]; +"45 mul_5" -> "46 add_2" [style=solid, label="(1, 4, 3, 16)"]; +"46 add_2" -> "47 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; +"47 scaled_dot_product_attention" -> "48 transpose_3" [style=solid, label="(1, 4, 3, 16)"]; +"48 transpose_3" -> "49 view_3" [style=solid, label="(1, 3, 4, 16)"]; +"49 view_3" -> "52 linear_3" [style=solid, label="(1, 3, 64)"]; +"50 o_proj_weight_updated_constant0" -> "51 symmetric_weights_decompressor_o_proj_weight_0" [style=solid, label="(2048, 1)"]; +"51 symmetric_weights_decompressor_o_proj_weight_0" -> "52 linear_3" [style=solid, label="(64, 64)"]; +"52 linear_3" -> "53 add_3" [style=solid, label="(1, 3, 64)"]; +"53 add_3" -> "54 _assert_tensor_metadata_default_2" [style=solid, label="(1, 3, 64)"]; +"53 add_3" -> "55 to_2" [style=solid, label="(1, 3, 64)"]; +"55 to_2" -> "56 pow_2" [style=solid, label="(1, 3, 64)"]; +"55 to_2" -> "60 mul_6" [style=solid, label="(1, 3, 64)"]; +"55 to_2" -> "75 add_5" [style=solid, label="(1, 3, 64)"]; +"56 pow_2" -> "57 mean_1" [style=solid, label="(1, 3, 64)"]; +"57 mean_1" -> "58 add_4" [style=solid, label="(1, 3, 1)"]; +"58 add_4" -> "59 rsqrt_1" [style=solid, label="(1, 3, 1)"]; +"59 rsqrt_1" -> "60 mul_6" [style=solid, label="(1, 3, 1)"]; +"60 mul_6" -> "61 _assert_tensor_metadata_default_3" [style=solid, label="(1, 3, 64)"]; +"60 mul_6" -> "62 to_3" [style=solid, label="(1, 3, 64)"]; +"62 to_3" -> "63 mul_7" [style=solid, label="(1, 3, 64)"]; +"63 mul_7" -> "66 linear_4" [style=solid, label="(1, 3, 64)"]; +"63 mul_7" -> "70 linear_5" [style=solid, label="(1, 3, 64)"]; +"64 mlp_gate_proj_weight_updated_constant0" -> "65 symmetric_weights_decompressor_mlp_gate_proj_weight_0" [style=solid, label="(4096, 1)"]; +"65 symmetric_weights_decompressor_mlp_gate_proj_weight_0" -> "66 linear_4" [style=solid, label="(128, 64)"]; +"66 linear_4" -> "67 silu" [style=solid, label="(1, 3, 128)"]; +"67 silu" -> "71 mul_8" [style=solid, label="(1, 3, 128)"]; +"68 mlp_up_proj_weight_updated_constant0" -> "69 asymmetric_weights_decompressor_mlp_up_proj_weight_0" [style=solid, label="(128, 64)"]; +"69 asymmetric_weights_decompressor_mlp_up_proj_weight_0" -> "70 linear_5" [style=solid, label="(128, 64)"]; +"70 linear_5" -> "71 mul_8" [style=solid, label="(1, 3, 128)"]; +"71 mul_8" -> "74 linear_6" [style=solid, label="(1, 3, 128)"]; +"72 mlp_down_proj_weight_updated_constant0" -> "73 asymmetric_weights_decompressor_mlp_down_proj_weight_0" [style=solid, label="(64, 128)"]; +"73 asymmetric_weights_decompressor_mlp_down_proj_weight_0" -> "74 linear_6" [style=solid, label="(64, 128)"]; +"74 linear_6" -> "75 add_5" [style=solid, label="(1, 3, 64)"]; +"75 add_5" -> "76 output" [style=solid, label="(1, 3, 64)"]; +} diff --git a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_False_sensitivity_metric_weight_quantization_error.dot b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_False_sensitivity_metric_weight_quantization_error.dot new file mode 100644 index 00000000000..c62d8eb460e --- /dev/null +++ b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_False_sensitivity_metric_weight_quantization_error.dot @@ -0,0 +1,169 @@ +strict digraph { +"0 attn_norm_weight" [id=0, type="get_attr"]; +"1 mlp_norm_weight" [id=1, type="get_attr"]; +"2 rope_cos" [id=2, type="get_attr"]; +"3 rope_sin" [id=3, type="get_attr"]; +"4 x_embed" [id=4, type=input]; +"5 arange" [id=5, type=arange]; +"6 _assert_tensor_metadata_default" [id=6, type="_assert_tensor_metadata"]; +"7 to" [id=7, type=to]; +"8 pow_1" [id=8, type=pow]; +"9 mean" [id=9, type=mean]; +"10 add" [id=10, type=add]; +"11 rsqrt" [id=11, type=rsqrt]; +"12 mul" [id=12, type=mul]; +"13 _assert_tensor_metadata_default_1" [id=13, type="_assert_tensor_metadata"]; +"14 to_1" [id=14, type=to]; +"15 mul_1" [id=15, type=mul]; +"16 q_proj_weight_updated_constant0" [id=16, type="get_attr"]; +"17 asymmetric_weights_decompressor_q_proj_weight_0" [id=17, type="call_module"]; +"18 linear" [id=18, type=linear]; +"19 view" [id=19, type=view]; +"20 transpose" [id=20, type=transpose]; +"21 k_proj_weight_updated_constant0" [id=21, type="get_attr"]; +"22 symmetric_weights_decompressor_k_proj_weight_0" [id=22, type="call_module"]; +"23 linear_1" [id=23, type=linear]; +"24 view_1" [id=24, type=view]; +"25 transpose_1" [id=25, type=transpose]; +"26 v_proj_weight_updated_constant0" [id=26, type="get_attr"]; +"27 symmetric_weights_decompressor_v_proj_weight_0" [id=27, type="call_module"]; +"28 linear_2" [id=28, type=linear]; +"29 view_2" [id=29, type=view]; +"30 transpose_2" [id=30, type=transpose]; +"31 index" [id=31, type=index]; +"32 index_1" [id=32, type=index]; +"33 mul_2" [id=33, type=mul]; +"34 slice_1" [id=34, type=slice]; +"35 slice_2" [id=35, type=slice]; +"36 neg" [id=36, type=neg]; +"37 cat" [id=37, type=cat]; +"38 mul_3" [id=38, type=mul]; +"39 add_1" [id=39, type=add]; +"40 mul_4" [id=40, type=mul]; +"41 slice_3" [id=41, type=slice]; +"42 slice_4" [id=42, type=slice]; +"43 neg_1" [id=43, type=neg]; +"44 cat_1" [id=44, type=cat]; +"45 mul_5" [id=45, type=mul]; +"46 add_2" [id=46, type=add]; +"47 scaled_dot_product_attention" [id=47, type="scaled_dot_product_attention"]; +"48 transpose_3" [id=48, type=transpose]; +"49 view_3" [id=49, type=view]; +"50 o_proj_weight_updated_constant0" [id=50, type="get_attr"]; +"51 asymmetric_weights_decompressor_o_proj_weight_0" [id=51, type="call_module"]; +"52 linear_3" [id=52, type=linear]; +"53 add_3" [id=53, type=add]; +"54 _assert_tensor_metadata_default_2" [id=54, type="_assert_tensor_metadata"]; +"55 to_2" [id=55, type=to]; +"56 pow_2" [id=56, type=pow]; +"57 mean_1" [id=57, type=mean]; +"58 add_4" [id=58, type=add]; +"59 rsqrt_1" [id=59, type=rsqrt]; +"60 mul_6" [id=60, type=mul]; +"61 _assert_tensor_metadata_default_3" [id=61, type="_assert_tensor_metadata"]; +"62 to_3" [id=62, type=to]; +"63 mul_7" [id=63, type=mul]; +"64 mlp_gate_proj_weight_updated_constant0" [id=64, type="get_attr"]; +"65 symmetric_weights_decompressor_mlp_gate_proj_weight_0" [id=65, type="call_module"]; +"66 linear_4" [id=66, type=linear]; +"67 silu" [id=67, type=silu]; +"68 mlp_up_proj_weight_updated_constant0" [id=68, type="get_attr"]; +"69 symmetric_weights_decompressor_mlp_up_proj_weight_0" [id=69, type="call_module"]; +"70 linear_5" [id=70, type=linear]; +"71 mul_8" [id=71, type=mul]; +"72 mlp_down_proj_weight_updated_constant0" [id=72, type="get_attr"]; +"73 asymmetric_weights_decompressor_mlp_down_proj_weight_0" [id=73, type="call_module"]; +"74 linear_6" [id=74, type=linear]; +"75 add_5" [id=75, type=add]; +"76 output" [id=76, type=output]; +"0 attn_norm_weight" -> "15 mul_1" [style=solid, label="(64,)"]; +"1 mlp_norm_weight" -> "63 mul_7" [style=solid, label="(64,)"]; +"2 rope_cos" -> "31 index" [style=solid, label="(1, 1, 128, 16)"]; +"3 rope_sin" -> "32 index_1" [style=solid, label="(1, 1, 128, 16)"]; +"4 x_embed" -> "6 _assert_tensor_metadata_default" [style=solid, label="(1, 3, 64)"]; +"4 x_embed" -> "7 to" [style=solid, label="(1, 3, 64)"]; +"4 x_embed" -> "53 add_3" [style=solid, label="(1, 3, 64)"]; +"5 arange" -> "31 index" [style=solid, label="(3,)"]; +"5 arange" -> "32 index_1" [style=solid, label="(3,)"]; +"7 to" -> "8 pow_1" [style=solid, label="(1, 3, 64)"]; +"7 to" -> "12 mul" [style=solid, label="(1, 3, 64)"]; +"8 pow_1" -> "9 mean" [style=solid, label="(1, 3, 64)"]; +"9 mean" -> "10 add" [style=solid, label="(1, 3, 1)"]; +"10 add" -> "11 rsqrt" [style=solid, label="(1, 3, 1)"]; +"11 rsqrt" -> "12 mul" [style=solid, label="(1, 3, 1)"]; +"12 mul" -> "13 _assert_tensor_metadata_default_1" [style=solid, label="(1, 3, 64)"]; +"12 mul" -> "14 to_1" [style=solid, label="(1, 3, 64)"]; +"14 to_1" -> "15 mul_1" [style=solid, label="(1, 3, 64)"]; +"15 mul_1" -> "18 linear" [style=solid, label="(1, 3, 64)"]; +"15 mul_1" -> "23 linear_1" [style=solid, label="(1, 3, 64)"]; +"15 mul_1" -> "28 linear_2" [style=solid, label="(1, 3, 64)"]; +"16 q_proj_weight_updated_constant0" -> "17 asymmetric_weights_decompressor_q_proj_weight_0" [style=solid, label="(64, 64)"]; +"17 asymmetric_weights_decompressor_q_proj_weight_0" -> "18 linear" [style=solid, label="(64, 64)"]; +"18 linear" -> "19 view" [style=solid, label="(1, 3, 64)"]; +"19 view" -> "20 transpose" [style=solid, label="(1, 3, 4, 16)"]; +"20 transpose" -> "33 mul_2" [style=solid, label="(1, 4, 3, 16)"]; +"20 transpose" -> "34 slice_1" [style=solid, label="(1, 4, 3, 16)"]; +"20 transpose" -> "35 slice_2" [style=solid, label="(1, 4, 3, 16)"]; +"21 k_proj_weight_updated_constant0" -> "22 symmetric_weights_decompressor_k_proj_weight_0" [style=solid, label="(2048, 1)"]; +"22 symmetric_weights_decompressor_k_proj_weight_0" -> "23 linear_1" [style=solid, label="(64, 64)"]; +"23 linear_1" -> "24 view_1" [style=solid, label="(1, 3, 64)"]; +"24 view_1" -> "25 transpose_1" [style=solid, label="(1, 3, 4, 16)"]; +"25 transpose_1" -> "40 mul_4" [style=solid, label="(1, 4, 3, 16)"]; +"25 transpose_1" -> "41 slice_3" [style=solid, label="(1, 4, 3, 16)"]; +"25 transpose_1" -> "42 slice_4" [style=solid, label="(1, 4, 3, 16)"]; +"26 v_proj_weight_updated_constant0" -> "27 symmetric_weights_decompressor_v_proj_weight_0" [style=solid, label="(2048, 1)"]; +"27 symmetric_weights_decompressor_v_proj_weight_0" -> "28 linear_2" [style=solid, label="(64, 64)"]; +"28 linear_2" -> "29 view_2" [style=solid, label="(1, 3, 64)"]; +"29 view_2" -> "30 transpose_2" [style=solid, label="(1, 3, 4, 16)"]; +"30 transpose_2" -> "47 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; +"31 index" -> "33 mul_2" [style=solid, label="(1, 1, 3, 16)"]; +"31 index" -> "40 mul_4" [style=solid, label="(1, 1, 3, 16)"]; +"32 index_1" -> "38 mul_3" [style=solid, label="(1, 1, 3, 16)"]; +"32 index_1" -> "45 mul_5" [style=solid, label="(1, 1, 3, 16)"]; +"33 mul_2" -> "39 add_1" [style=solid, label="(1, 4, 3, 16)"]; +"34 slice_1" -> "37 cat" [style=solid, label="(1, 4, 3, 8)"]; +"35 slice_2" -> "36 neg" [style=solid, label="(1, 4, 3, 8)"]; +"36 neg" -> "37 cat" [style=solid, label="(1, 4, 3, 8)"]; +"37 cat" -> "38 mul_3" [style=solid, label="(1, 4, 3, 16)"]; +"38 mul_3" -> "39 add_1" [style=solid, label="(1, 4, 3, 16)"]; +"39 add_1" -> "47 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; +"40 mul_4" -> "46 add_2" [style=solid, label="(1, 4, 3, 16)"]; +"41 slice_3" -> "44 cat_1" [style=solid, label="(1, 4, 3, 8)"]; +"42 slice_4" -> "43 neg_1" [style=solid, label="(1, 4, 3, 8)"]; +"43 neg_1" -> "44 cat_1" [style=solid, label="(1, 4, 3, 8)"]; +"44 cat_1" -> "45 mul_5" [style=solid, label="(1, 4, 3, 16)"]; +"45 mul_5" -> "46 add_2" [style=solid, label="(1, 4, 3, 16)"]; +"46 add_2" -> "47 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; +"47 scaled_dot_product_attention" -> "48 transpose_3" [style=solid, label="(1, 4, 3, 16)"]; +"48 transpose_3" -> "49 view_3" [style=solid, label="(1, 3, 4, 16)"]; +"49 view_3" -> "52 linear_3" [style=solid, label="(1, 3, 64)"]; +"50 o_proj_weight_updated_constant0" -> "51 asymmetric_weights_decompressor_o_proj_weight_0" [style=solid, label="(64, 64)"]; +"51 asymmetric_weights_decompressor_o_proj_weight_0" -> "52 linear_3" [style=solid, label="(64, 64)"]; +"52 linear_3" -> "53 add_3" [style=solid, label="(1, 3, 64)"]; +"53 add_3" -> "54 _assert_tensor_metadata_default_2" [style=solid, label="(1, 3, 64)"]; +"53 add_3" -> "55 to_2" [style=solid, label="(1, 3, 64)"]; +"55 to_2" -> "56 pow_2" [style=solid, label="(1, 3, 64)"]; +"55 to_2" -> "60 mul_6" [style=solid, label="(1, 3, 64)"]; +"55 to_2" -> "75 add_5" [style=solid, label="(1, 3, 64)"]; +"56 pow_2" -> "57 mean_1" [style=solid, label="(1, 3, 64)"]; +"57 mean_1" -> "58 add_4" [style=solid, label="(1, 3, 1)"]; +"58 add_4" -> "59 rsqrt_1" [style=solid, label="(1, 3, 1)"]; +"59 rsqrt_1" -> "60 mul_6" [style=solid, label="(1, 3, 1)"]; +"60 mul_6" -> "61 _assert_tensor_metadata_default_3" [style=solid, label="(1, 3, 64)"]; +"60 mul_6" -> "62 to_3" [style=solid, label="(1, 3, 64)"]; +"62 to_3" -> "63 mul_7" [style=solid, label="(1, 3, 64)"]; +"63 mul_7" -> "66 linear_4" [style=solid, label="(1, 3, 64)"]; +"63 mul_7" -> "70 linear_5" [style=solid, label="(1, 3, 64)"]; +"64 mlp_gate_proj_weight_updated_constant0" -> "65 symmetric_weights_decompressor_mlp_gate_proj_weight_0" [style=solid, label="(4096, 1)"]; +"65 symmetric_weights_decompressor_mlp_gate_proj_weight_0" -> "66 linear_4" [style=solid, label="(128, 64)"]; +"66 linear_4" -> "67 silu" [style=solid, label="(1, 3, 128)"]; +"67 silu" -> "71 mul_8" [style=solid, label="(1, 3, 128)"]; +"68 mlp_up_proj_weight_updated_constant0" -> "69 symmetric_weights_decompressor_mlp_up_proj_weight_0" [style=solid, label="(4096, 1)"]; +"69 symmetric_weights_decompressor_mlp_up_proj_weight_0" -> "70 linear_5" [style=solid, label="(128, 64)"]; +"70 linear_5" -> "71 mul_8" [style=solid, label="(1, 3, 128)"]; +"71 mul_8" -> "74 linear_6" [style=solid, label="(1, 3, 128)"]; +"72 mlp_down_proj_weight_updated_constant0" -> "73 asymmetric_weights_decompressor_mlp_down_proj_weight_0" [style=solid, label="(64, 128)"]; +"73 asymmetric_weights_decompressor_mlp_down_proj_weight_0" -> "74 linear_6" [style=solid, label="(64, 128)"]; +"74 linear_6" -> "75 add_5" [style=solid, label="(1, 3, 64)"]; +"75 add_5" -> "76 output" [style=solid, label="(1, 3, 64)"]; +} diff --git a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_True_ref_wc_param.json b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_True_ref_wc_param.json new file mode 100644 index 00000000000..e1baa81d0dc --- /dev/null +++ b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_True_ref_wc_param.json @@ -0,0 +1,128 @@ +[ + { + "weight_name": "q_proj_weight", + "node_with_weight": "linear", + "weight_port_id": 1, + "weight_dtype": "float32", + "weight_shape": [ + 64, + 64 + ], + "reduction_axes": [ + 1 + ], + "compression_config": { + "mode": "int4_sym", + "group_size": 32, + "codebook_values": null + } + }, + { + "weight_name": "k_proj_weight", + "node_with_weight": "linear_1", + "weight_port_id": 1, + "weight_dtype": "float32", + "weight_shape": [ + 64, + 64 + ], + "reduction_axes": [ + 1 + ], + "compression_config": { + "mode": "int4_sym", + "group_size": 32, + "codebook_values": null + } + }, + { + "weight_name": "v_proj_weight", + "node_with_weight": "linear_2", + "weight_port_id": 1, + "weight_dtype": "float32", + "weight_shape": [ + 64, + 64 + ], + "reduction_axes": [ + 1 + ], + "compression_config": { + "mode": "int4_sym", + "group_size": 32, + "codebook_values": null + } + }, + { + "weight_name": "o_proj_weight", + "node_with_weight": "linear_3", + "weight_port_id": 1, + "weight_dtype": "float32", + "weight_shape": [ + 64, + 64 + ], + "reduction_axes": [ + 1 + ], + "compression_config": { + "mode": "int4_sym", + "group_size": 32, + "codebook_values": null + } + }, + { + "weight_name": "mlp_gate_proj_weight", + "node_with_weight": "linear_4", + "weight_port_id": 1, + "weight_dtype": "float32", + "weight_shape": [ + 128, + 64 + ], + "reduction_axes": [ + 1 + ], + "compression_config": { + "mode": "int4_sym", + "group_size": 32, + "codebook_values": null + } + }, + { + "weight_name": "mlp_up_proj_weight", + "node_with_weight": "linear_5", + "weight_port_id": 1, + "weight_dtype": "float32", + "weight_shape": [ + 128, + 64 + ], + "reduction_axes": [ + 1 + ], + "compression_config": { + "mode": "int4_sym", + "group_size": 32, + "codebook_values": null + } + }, + { + "weight_name": "mlp_down_proj_weight", + "node_with_weight": "linear_6", + "weight_port_id": 1, + "weight_dtype": "float32", + "weight_shape": [ + 64, + 128 + ], + "reduction_axes": [ + 1 + ], + "compression_config": { + "mode": "int4_sym", + "group_size": 32, + "codebook_values": null + } + } +] \ No newline at end of file diff --git a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_True_sensitivity_metric_hessian_input_activation.dot b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_True_sensitivity_metric_hessian_input_activation.dot new file mode 100644 index 00000000000..31fb9463c88 --- /dev/null +++ b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_True_sensitivity_metric_hessian_input_activation.dot @@ -0,0 +1,169 @@ +strict digraph { +"0 attn_norm_weight" [id=0, type="get_attr"]; +"1 mlp_norm_weight" [id=1, type="get_attr"]; +"2 rope_cos" [id=2, type="get_attr"]; +"3 rope_sin" [id=3, type="get_attr"]; +"4 x_embed" [id=4, type=input]; +"5 arange" [id=5, type=arange]; +"6 _assert_tensor_metadata_default" [id=6, type="_assert_tensor_metadata"]; +"7 to" [id=7, type=to]; +"8 pow_1" [id=8, type=pow]; +"9 mean" [id=9, type=mean]; +"10 add" [id=10, type=add]; +"11 rsqrt" [id=11, type=rsqrt]; +"12 mul" [id=12, type=mul]; +"13 _assert_tensor_metadata_default_1" [id=13, type="_assert_tensor_metadata"]; +"14 to_1" [id=14, type=to]; +"15 mul_1" [id=15, type=mul]; +"16 q_proj_weight_updated_constant0" [id=16, type="get_attr"]; +"17 symmetric_weights_decompressor_q_proj_weight_0" [id=17, type="call_module"]; +"18 linear" [id=18, type=linear]; +"19 view" [id=19, type=view]; +"20 transpose" [id=20, type=transpose]; +"21 k_proj_weight_updated_constant0" [id=21, type="get_attr"]; +"22 symmetric_weights_decompressor_k_proj_weight_0" [id=22, type="call_module"]; +"23 linear_1" [id=23, type=linear]; +"24 view_1" [id=24, type=view]; +"25 transpose_1" [id=25, type=transpose]; +"26 v_proj_weight_updated_constant0" [id=26, type="get_attr"]; +"27 symmetric_weights_decompressor_v_proj_weight_0" [id=27, type="call_module"]; +"28 linear_2" [id=28, type=linear]; +"29 view_2" [id=29, type=view]; +"30 transpose_2" [id=30, type=transpose]; +"31 index" [id=31, type=index]; +"32 index_1" [id=32, type=index]; +"33 mul_2" [id=33, type=mul]; +"34 slice_1" [id=34, type=slice]; +"35 slice_2" [id=35, type=slice]; +"36 neg" [id=36, type=neg]; +"37 cat" [id=37, type=cat]; +"38 mul_3" [id=38, type=mul]; +"39 add_1" [id=39, type=add]; +"40 mul_4" [id=40, type=mul]; +"41 slice_3" [id=41, type=slice]; +"42 slice_4" [id=42, type=slice]; +"43 neg_1" [id=43, type=neg]; +"44 cat_1" [id=44, type=cat]; +"45 mul_5" [id=45, type=mul]; +"46 add_2" [id=46, type=add]; +"47 scaled_dot_product_attention" [id=47, type="scaled_dot_product_attention"]; +"48 transpose_3" [id=48, type=transpose]; +"49 view_3" [id=49, type=view]; +"50 o_proj_weight_updated_constant0" [id=50, type="get_attr"]; +"51 symmetric_weights_decompressor_o_proj_weight_0" [id=51, type="call_module"]; +"52 linear_3" [id=52, type=linear]; +"53 add_3" [id=53, type=add]; +"54 _assert_tensor_metadata_default_2" [id=54, type="_assert_tensor_metadata"]; +"55 to_2" [id=55, type=to]; +"56 pow_2" [id=56, type=pow]; +"57 mean_1" [id=57, type=mean]; +"58 add_4" [id=58, type=add]; +"59 rsqrt_1" [id=59, type=rsqrt]; +"60 mul_6" [id=60, type=mul]; +"61 _assert_tensor_metadata_default_3" [id=61, type="_assert_tensor_metadata"]; +"62 to_3" [id=62, type=to]; +"63 mul_7" [id=63, type=mul]; +"64 mlp_gate_proj_weight_updated_constant0" [id=64, type="get_attr"]; +"65 asymmetric_weights_decompressor_mlp_gate_proj_weight_0" [id=65, type="call_module"]; +"66 linear_4" [id=66, type=linear]; +"67 silu" [id=67, type=silu]; +"68 mlp_up_proj_weight_updated_constant0" [id=68, type="get_attr"]; +"69 asymmetric_weights_decompressor_mlp_up_proj_weight_0" [id=69, type="call_module"]; +"70 linear_5" [id=70, type=linear]; +"71 mul_8" [id=71, type=mul]; +"72 mlp_down_proj_weight_updated_constant0" [id=72, type="get_attr"]; +"73 symmetric_weights_decompressor_mlp_down_proj_weight_0" [id=73, type="call_module"]; +"74 linear_6" [id=74, type=linear]; +"75 add_5" [id=75, type=add]; +"76 output" [id=76, type=output]; +"0 attn_norm_weight" -> "15 mul_1" [style=solid, label="(64,)"]; +"1 mlp_norm_weight" -> "63 mul_7" [style=solid, label="(64,)"]; +"2 rope_cos" -> "31 index" [style=solid, label="(1, 1, 128, 16)"]; +"3 rope_sin" -> "32 index_1" [style=solid, label="(1, 1, 128, 16)"]; +"4 x_embed" -> "6 _assert_tensor_metadata_default" [style=solid, label="(1, 3, 64)"]; +"4 x_embed" -> "7 to" [style=solid, label="(1, 3, 64)"]; +"4 x_embed" -> "53 add_3" [style=solid, label="(1, 3, 64)"]; +"5 arange" -> "31 index" [style=solid, label="(3,)"]; +"5 arange" -> "32 index_1" [style=solid, label="(3,)"]; +"7 to" -> "8 pow_1" [style=solid, label="(1, 3, 64)"]; +"7 to" -> "12 mul" [style=solid, label="(1, 3, 64)"]; +"8 pow_1" -> "9 mean" [style=solid, label="(1, 3, 64)"]; +"9 mean" -> "10 add" [style=solid, label="(1, 3, 1)"]; +"10 add" -> "11 rsqrt" [style=solid, label="(1, 3, 1)"]; +"11 rsqrt" -> "12 mul" [style=solid, label="(1, 3, 1)"]; +"12 mul" -> "13 _assert_tensor_metadata_default_1" [style=solid, label="(1, 3, 64)"]; +"12 mul" -> "14 to_1" [style=solid, label="(1, 3, 64)"]; +"14 to_1" -> "15 mul_1" [style=solid, label="(1, 3, 64)"]; +"15 mul_1" -> "18 linear" [style=solid, label="(1, 3, 64)"]; +"15 mul_1" -> "23 linear_1" [style=solid, label="(1, 3, 64)"]; +"15 mul_1" -> "28 linear_2" [style=solid, label="(1, 3, 64)"]; +"16 q_proj_weight_updated_constant0" -> "17 symmetric_weights_decompressor_q_proj_weight_0" [style=solid, label="(2048, 1)"]; +"17 symmetric_weights_decompressor_q_proj_weight_0" -> "18 linear" [style=solid, label="(64, 64)"]; +"18 linear" -> "19 view" [style=solid, label="(1, 3, 64)"]; +"19 view" -> "20 transpose" [style=solid, label="(1, 3, 4, 16)"]; +"20 transpose" -> "33 mul_2" [style=solid, label="(1, 4, 3, 16)"]; +"20 transpose" -> "34 slice_1" [style=solid, label="(1, 4, 3, 16)"]; +"20 transpose" -> "35 slice_2" [style=solid, label="(1, 4, 3, 16)"]; +"21 k_proj_weight_updated_constant0" -> "22 symmetric_weights_decompressor_k_proj_weight_0" [style=solid, label="(2048, 1)"]; +"22 symmetric_weights_decompressor_k_proj_weight_0" -> "23 linear_1" [style=solid, label="(64, 64)"]; +"23 linear_1" -> "24 view_1" [style=solid, label="(1, 3, 64)"]; +"24 view_1" -> "25 transpose_1" [style=solid, label="(1, 3, 4, 16)"]; +"25 transpose_1" -> "40 mul_4" [style=solid, label="(1, 4, 3, 16)"]; +"25 transpose_1" -> "41 slice_3" [style=solid, label="(1, 4, 3, 16)"]; +"25 transpose_1" -> "42 slice_4" [style=solid, label="(1, 4, 3, 16)"]; +"26 v_proj_weight_updated_constant0" -> "27 symmetric_weights_decompressor_v_proj_weight_0" [style=solid, label="(2048, 1)"]; +"27 symmetric_weights_decompressor_v_proj_weight_0" -> "28 linear_2" [style=solid, label="(64, 64)"]; +"28 linear_2" -> "29 view_2" [style=solid, label="(1, 3, 64)"]; +"29 view_2" -> "30 transpose_2" [style=solid, label="(1, 3, 4, 16)"]; +"30 transpose_2" -> "47 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; +"31 index" -> "33 mul_2" [style=solid, label="(1, 1, 3, 16)"]; +"31 index" -> "40 mul_4" [style=solid, label="(1, 1, 3, 16)"]; +"32 index_1" -> "38 mul_3" [style=solid, label="(1, 1, 3, 16)"]; +"32 index_1" -> "45 mul_5" [style=solid, label="(1, 1, 3, 16)"]; +"33 mul_2" -> "39 add_1" [style=solid, label="(1, 4, 3, 16)"]; +"34 slice_1" -> "37 cat" [style=solid, label="(1, 4, 3, 8)"]; +"35 slice_2" -> "36 neg" [style=solid, label="(1, 4, 3, 8)"]; +"36 neg" -> "37 cat" [style=solid, label="(1, 4, 3, 8)"]; +"37 cat" -> "38 mul_3" [style=solid, label="(1, 4, 3, 16)"]; +"38 mul_3" -> "39 add_1" [style=solid, label="(1, 4, 3, 16)"]; +"39 add_1" -> "47 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; +"40 mul_4" -> "46 add_2" [style=solid, label="(1, 4, 3, 16)"]; +"41 slice_3" -> "44 cat_1" [style=solid, label="(1, 4, 3, 8)"]; +"42 slice_4" -> "43 neg_1" [style=solid, label="(1, 4, 3, 8)"]; +"43 neg_1" -> "44 cat_1" [style=solid, label="(1, 4, 3, 8)"]; +"44 cat_1" -> "45 mul_5" [style=solid, label="(1, 4, 3, 16)"]; +"45 mul_5" -> "46 add_2" [style=solid, label="(1, 4, 3, 16)"]; +"46 add_2" -> "47 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; +"47 scaled_dot_product_attention" -> "48 transpose_3" [style=solid, label="(1, 4, 3, 16)"]; +"48 transpose_3" -> "49 view_3" [style=solid, label="(1, 3, 4, 16)"]; +"49 view_3" -> "52 linear_3" [style=solid, label="(1, 3, 64)"]; +"50 o_proj_weight_updated_constant0" -> "51 symmetric_weights_decompressor_o_proj_weight_0" [style=solid, label="(2048, 1)"]; +"51 symmetric_weights_decompressor_o_proj_weight_0" -> "52 linear_3" [style=solid, label="(64, 64)"]; +"52 linear_3" -> "53 add_3" [style=solid, label="(1, 3, 64)"]; +"53 add_3" -> "54 _assert_tensor_metadata_default_2" [style=solid, label="(1, 3, 64)"]; +"53 add_3" -> "55 to_2" [style=solid, label="(1, 3, 64)"]; +"55 to_2" -> "56 pow_2" [style=solid, label="(1, 3, 64)"]; +"55 to_2" -> "60 mul_6" [style=solid, label="(1, 3, 64)"]; +"55 to_2" -> "75 add_5" [style=solid, label="(1, 3, 64)"]; +"56 pow_2" -> "57 mean_1" [style=solid, label="(1, 3, 64)"]; +"57 mean_1" -> "58 add_4" [style=solid, label="(1, 3, 1)"]; +"58 add_4" -> "59 rsqrt_1" [style=solid, label="(1, 3, 1)"]; +"59 rsqrt_1" -> "60 mul_6" [style=solid, label="(1, 3, 1)"]; +"60 mul_6" -> "61 _assert_tensor_metadata_default_3" [style=solid, label="(1, 3, 64)"]; +"60 mul_6" -> "62 to_3" [style=solid, label="(1, 3, 64)"]; +"62 to_3" -> "63 mul_7" [style=solid, label="(1, 3, 64)"]; +"63 mul_7" -> "66 linear_4" [style=solid, label="(1, 3, 64)"]; +"63 mul_7" -> "70 linear_5" [style=solid, label="(1, 3, 64)"]; +"64 mlp_gate_proj_weight_updated_constant0" -> "65 asymmetric_weights_decompressor_mlp_gate_proj_weight_0" [style=solid, label="(128, 64)"]; +"65 asymmetric_weights_decompressor_mlp_gate_proj_weight_0" -> "66 linear_4" [style=solid, label="(128, 64)"]; +"66 linear_4" -> "67 silu" [style=solid, label="(1, 3, 128)"]; +"67 silu" -> "71 mul_8" [style=solid, label="(1, 3, 128)"]; +"68 mlp_up_proj_weight_updated_constant0" -> "69 asymmetric_weights_decompressor_mlp_up_proj_weight_0" [style=solid, label="(128, 64)"]; +"69 asymmetric_weights_decompressor_mlp_up_proj_weight_0" -> "70 linear_5" [style=solid, label="(128, 64)"]; +"70 linear_5" -> "71 mul_8" [style=solid, label="(1, 3, 128)"]; +"71 mul_8" -> "74 linear_6" [style=solid, label="(1, 3, 128)"]; +"72 mlp_down_proj_weight_updated_constant0" -> "73 symmetric_weights_decompressor_mlp_down_proj_weight_0" [style=solid, label="(4096, 1)"]; +"73 symmetric_weights_decompressor_mlp_down_proj_weight_0" -> "74 linear_6" [style=solid, label="(64, 128)"]; +"74 linear_6" -> "75 add_5" [style=solid, label="(1, 3, 64)"]; +"75 add_5" -> "76 output" [style=solid, label="(1, 3, 64)"]; +} diff --git a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_True_sensitivity_metric_max_activation_variance.dot b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_True_sensitivity_metric_max_activation_variance.dot new file mode 100644 index 00000000000..31fb9463c88 --- /dev/null +++ b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_True_sensitivity_metric_max_activation_variance.dot @@ -0,0 +1,169 @@ +strict digraph { +"0 attn_norm_weight" [id=0, type="get_attr"]; +"1 mlp_norm_weight" [id=1, type="get_attr"]; +"2 rope_cos" [id=2, type="get_attr"]; +"3 rope_sin" [id=3, type="get_attr"]; +"4 x_embed" [id=4, type=input]; +"5 arange" [id=5, type=arange]; +"6 _assert_tensor_metadata_default" [id=6, type="_assert_tensor_metadata"]; +"7 to" [id=7, type=to]; +"8 pow_1" [id=8, type=pow]; +"9 mean" [id=9, type=mean]; +"10 add" [id=10, type=add]; +"11 rsqrt" [id=11, type=rsqrt]; +"12 mul" [id=12, type=mul]; +"13 _assert_tensor_metadata_default_1" [id=13, type="_assert_tensor_metadata"]; +"14 to_1" [id=14, type=to]; +"15 mul_1" [id=15, type=mul]; +"16 q_proj_weight_updated_constant0" [id=16, type="get_attr"]; +"17 symmetric_weights_decompressor_q_proj_weight_0" [id=17, type="call_module"]; +"18 linear" [id=18, type=linear]; +"19 view" [id=19, type=view]; +"20 transpose" [id=20, type=transpose]; +"21 k_proj_weight_updated_constant0" [id=21, type="get_attr"]; +"22 symmetric_weights_decompressor_k_proj_weight_0" [id=22, type="call_module"]; +"23 linear_1" [id=23, type=linear]; +"24 view_1" [id=24, type=view]; +"25 transpose_1" [id=25, type=transpose]; +"26 v_proj_weight_updated_constant0" [id=26, type="get_attr"]; +"27 symmetric_weights_decompressor_v_proj_weight_0" [id=27, type="call_module"]; +"28 linear_2" [id=28, type=linear]; +"29 view_2" [id=29, type=view]; +"30 transpose_2" [id=30, type=transpose]; +"31 index" [id=31, type=index]; +"32 index_1" [id=32, type=index]; +"33 mul_2" [id=33, type=mul]; +"34 slice_1" [id=34, type=slice]; +"35 slice_2" [id=35, type=slice]; +"36 neg" [id=36, type=neg]; +"37 cat" [id=37, type=cat]; +"38 mul_3" [id=38, type=mul]; +"39 add_1" [id=39, type=add]; +"40 mul_4" [id=40, type=mul]; +"41 slice_3" [id=41, type=slice]; +"42 slice_4" [id=42, type=slice]; +"43 neg_1" [id=43, type=neg]; +"44 cat_1" [id=44, type=cat]; +"45 mul_5" [id=45, type=mul]; +"46 add_2" [id=46, type=add]; +"47 scaled_dot_product_attention" [id=47, type="scaled_dot_product_attention"]; +"48 transpose_3" [id=48, type=transpose]; +"49 view_3" [id=49, type=view]; +"50 o_proj_weight_updated_constant0" [id=50, type="get_attr"]; +"51 symmetric_weights_decompressor_o_proj_weight_0" [id=51, type="call_module"]; +"52 linear_3" [id=52, type=linear]; +"53 add_3" [id=53, type=add]; +"54 _assert_tensor_metadata_default_2" [id=54, type="_assert_tensor_metadata"]; +"55 to_2" [id=55, type=to]; +"56 pow_2" [id=56, type=pow]; +"57 mean_1" [id=57, type=mean]; +"58 add_4" [id=58, type=add]; +"59 rsqrt_1" [id=59, type=rsqrt]; +"60 mul_6" [id=60, type=mul]; +"61 _assert_tensor_metadata_default_3" [id=61, type="_assert_tensor_metadata"]; +"62 to_3" [id=62, type=to]; +"63 mul_7" [id=63, type=mul]; +"64 mlp_gate_proj_weight_updated_constant0" [id=64, type="get_attr"]; +"65 asymmetric_weights_decompressor_mlp_gate_proj_weight_0" [id=65, type="call_module"]; +"66 linear_4" [id=66, type=linear]; +"67 silu" [id=67, type=silu]; +"68 mlp_up_proj_weight_updated_constant0" [id=68, type="get_attr"]; +"69 asymmetric_weights_decompressor_mlp_up_proj_weight_0" [id=69, type="call_module"]; +"70 linear_5" [id=70, type=linear]; +"71 mul_8" [id=71, type=mul]; +"72 mlp_down_proj_weight_updated_constant0" [id=72, type="get_attr"]; +"73 symmetric_weights_decompressor_mlp_down_proj_weight_0" [id=73, type="call_module"]; +"74 linear_6" [id=74, type=linear]; +"75 add_5" [id=75, type=add]; +"76 output" [id=76, type=output]; +"0 attn_norm_weight" -> "15 mul_1" [style=solid, label="(64,)"]; +"1 mlp_norm_weight" -> "63 mul_7" [style=solid, label="(64,)"]; +"2 rope_cos" -> "31 index" [style=solid, label="(1, 1, 128, 16)"]; +"3 rope_sin" -> "32 index_1" [style=solid, label="(1, 1, 128, 16)"]; +"4 x_embed" -> "6 _assert_tensor_metadata_default" [style=solid, label="(1, 3, 64)"]; +"4 x_embed" -> "7 to" [style=solid, label="(1, 3, 64)"]; +"4 x_embed" -> "53 add_3" [style=solid, label="(1, 3, 64)"]; +"5 arange" -> "31 index" [style=solid, label="(3,)"]; +"5 arange" -> "32 index_1" [style=solid, label="(3,)"]; +"7 to" -> "8 pow_1" [style=solid, label="(1, 3, 64)"]; +"7 to" -> "12 mul" [style=solid, label="(1, 3, 64)"]; +"8 pow_1" -> "9 mean" [style=solid, label="(1, 3, 64)"]; +"9 mean" -> "10 add" [style=solid, label="(1, 3, 1)"]; +"10 add" -> "11 rsqrt" [style=solid, label="(1, 3, 1)"]; +"11 rsqrt" -> "12 mul" [style=solid, label="(1, 3, 1)"]; +"12 mul" -> "13 _assert_tensor_metadata_default_1" [style=solid, label="(1, 3, 64)"]; +"12 mul" -> "14 to_1" [style=solid, label="(1, 3, 64)"]; +"14 to_1" -> "15 mul_1" [style=solid, label="(1, 3, 64)"]; +"15 mul_1" -> "18 linear" [style=solid, label="(1, 3, 64)"]; +"15 mul_1" -> "23 linear_1" [style=solid, label="(1, 3, 64)"]; +"15 mul_1" -> "28 linear_2" [style=solid, label="(1, 3, 64)"]; +"16 q_proj_weight_updated_constant0" -> "17 symmetric_weights_decompressor_q_proj_weight_0" [style=solid, label="(2048, 1)"]; +"17 symmetric_weights_decompressor_q_proj_weight_0" -> "18 linear" [style=solid, label="(64, 64)"]; +"18 linear" -> "19 view" [style=solid, label="(1, 3, 64)"]; +"19 view" -> "20 transpose" [style=solid, label="(1, 3, 4, 16)"]; +"20 transpose" -> "33 mul_2" [style=solid, label="(1, 4, 3, 16)"]; +"20 transpose" -> "34 slice_1" [style=solid, label="(1, 4, 3, 16)"]; +"20 transpose" -> "35 slice_2" [style=solid, label="(1, 4, 3, 16)"]; +"21 k_proj_weight_updated_constant0" -> "22 symmetric_weights_decompressor_k_proj_weight_0" [style=solid, label="(2048, 1)"]; +"22 symmetric_weights_decompressor_k_proj_weight_0" -> "23 linear_1" [style=solid, label="(64, 64)"]; +"23 linear_1" -> "24 view_1" [style=solid, label="(1, 3, 64)"]; +"24 view_1" -> "25 transpose_1" [style=solid, label="(1, 3, 4, 16)"]; +"25 transpose_1" -> "40 mul_4" [style=solid, label="(1, 4, 3, 16)"]; +"25 transpose_1" -> "41 slice_3" [style=solid, label="(1, 4, 3, 16)"]; +"25 transpose_1" -> "42 slice_4" [style=solid, label="(1, 4, 3, 16)"]; +"26 v_proj_weight_updated_constant0" -> "27 symmetric_weights_decompressor_v_proj_weight_0" [style=solid, label="(2048, 1)"]; +"27 symmetric_weights_decompressor_v_proj_weight_0" -> "28 linear_2" [style=solid, label="(64, 64)"]; +"28 linear_2" -> "29 view_2" [style=solid, label="(1, 3, 64)"]; +"29 view_2" -> "30 transpose_2" [style=solid, label="(1, 3, 4, 16)"]; +"30 transpose_2" -> "47 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; +"31 index" -> "33 mul_2" [style=solid, label="(1, 1, 3, 16)"]; +"31 index" -> "40 mul_4" [style=solid, label="(1, 1, 3, 16)"]; +"32 index_1" -> "38 mul_3" [style=solid, label="(1, 1, 3, 16)"]; +"32 index_1" -> "45 mul_5" [style=solid, label="(1, 1, 3, 16)"]; +"33 mul_2" -> "39 add_1" [style=solid, label="(1, 4, 3, 16)"]; +"34 slice_1" -> "37 cat" [style=solid, label="(1, 4, 3, 8)"]; +"35 slice_2" -> "36 neg" [style=solid, label="(1, 4, 3, 8)"]; +"36 neg" -> "37 cat" [style=solid, label="(1, 4, 3, 8)"]; +"37 cat" -> "38 mul_3" [style=solid, label="(1, 4, 3, 16)"]; +"38 mul_3" -> "39 add_1" [style=solid, label="(1, 4, 3, 16)"]; +"39 add_1" -> "47 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; +"40 mul_4" -> "46 add_2" [style=solid, label="(1, 4, 3, 16)"]; +"41 slice_3" -> "44 cat_1" [style=solid, label="(1, 4, 3, 8)"]; +"42 slice_4" -> "43 neg_1" [style=solid, label="(1, 4, 3, 8)"]; +"43 neg_1" -> "44 cat_1" [style=solid, label="(1, 4, 3, 8)"]; +"44 cat_1" -> "45 mul_5" [style=solid, label="(1, 4, 3, 16)"]; +"45 mul_5" -> "46 add_2" [style=solid, label="(1, 4, 3, 16)"]; +"46 add_2" -> "47 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; +"47 scaled_dot_product_attention" -> "48 transpose_3" [style=solid, label="(1, 4, 3, 16)"]; +"48 transpose_3" -> "49 view_3" [style=solid, label="(1, 3, 4, 16)"]; +"49 view_3" -> "52 linear_3" [style=solid, label="(1, 3, 64)"]; +"50 o_proj_weight_updated_constant0" -> "51 symmetric_weights_decompressor_o_proj_weight_0" [style=solid, label="(2048, 1)"]; +"51 symmetric_weights_decompressor_o_proj_weight_0" -> "52 linear_3" [style=solid, label="(64, 64)"]; +"52 linear_3" -> "53 add_3" [style=solid, label="(1, 3, 64)"]; +"53 add_3" -> "54 _assert_tensor_metadata_default_2" [style=solid, label="(1, 3, 64)"]; +"53 add_3" -> "55 to_2" [style=solid, label="(1, 3, 64)"]; +"55 to_2" -> "56 pow_2" [style=solid, label="(1, 3, 64)"]; +"55 to_2" -> "60 mul_6" [style=solid, label="(1, 3, 64)"]; +"55 to_2" -> "75 add_5" [style=solid, label="(1, 3, 64)"]; +"56 pow_2" -> "57 mean_1" [style=solid, label="(1, 3, 64)"]; +"57 mean_1" -> "58 add_4" [style=solid, label="(1, 3, 1)"]; +"58 add_4" -> "59 rsqrt_1" [style=solid, label="(1, 3, 1)"]; +"59 rsqrt_1" -> "60 mul_6" [style=solid, label="(1, 3, 1)"]; +"60 mul_6" -> "61 _assert_tensor_metadata_default_3" [style=solid, label="(1, 3, 64)"]; +"60 mul_6" -> "62 to_3" [style=solid, label="(1, 3, 64)"]; +"62 to_3" -> "63 mul_7" [style=solid, label="(1, 3, 64)"]; +"63 mul_7" -> "66 linear_4" [style=solid, label="(1, 3, 64)"]; +"63 mul_7" -> "70 linear_5" [style=solid, label="(1, 3, 64)"]; +"64 mlp_gate_proj_weight_updated_constant0" -> "65 asymmetric_weights_decompressor_mlp_gate_proj_weight_0" [style=solid, label="(128, 64)"]; +"65 asymmetric_weights_decompressor_mlp_gate_proj_weight_0" -> "66 linear_4" [style=solid, label="(128, 64)"]; +"66 linear_4" -> "67 silu" [style=solid, label="(1, 3, 128)"]; +"67 silu" -> "71 mul_8" [style=solid, label="(1, 3, 128)"]; +"68 mlp_up_proj_weight_updated_constant0" -> "69 asymmetric_weights_decompressor_mlp_up_proj_weight_0" [style=solid, label="(128, 64)"]; +"69 asymmetric_weights_decompressor_mlp_up_proj_weight_0" -> "70 linear_5" [style=solid, label="(128, 64)"]; +"70 linear_5" -> "71 mul_8" [style=solid, label="(1, 3, 128)"]; +"71 mul_8" -> "74 linear_6" [style=solid, label="(1, 3, 128)"]; +"72 mlp_down_proj_weight_updated_constant0" -> "73 symmetric_weights_decompressor_mlp_down_proj_weight_0" [style=solid, label="(4096, 1)"]; +"73 symmetric_weights_decompressor_mlp_down_proj_weight_0" -> "74 linear_6" [style=solid, label="(64, 128)"]; +"74 linear_6" -> "75 add_5" [style=solid, label="(1, 3, 64)"]; +"75 add_5" -> "76 output" [style=solid, label="(1, 3, 64)"]; +} diff --git a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_True_sensitivity_metric_mean_activation_magnitude.dot b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_True_sensitivity_metric_mean_activation_magnitude.dot new file mode 100644 index 00000000000..31fb9463c88 --- /dev/null +++ b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_True_sensitivity_metric_mean_activation_magnitude.dot @@ -0,0 +1,169 @@ +strict digraph { +"0 attn_norm_weight" [id=0, type="get_attr"]; +"1 mlp_norm_weight" [id=1, type="get_attr"]; +"2 rope_cos" [id=2, type="get_attr"]; +"3 rope_sin" [id=3, type="get_attr"]; +"4 x_embed" [id=4, type=input]; +"5 arange" [id=5, type=arange]; +"6 _assert_tensor_metadata_default" [id=6, type="_assert_tensor_metadata"]; +"7 to" [id=7, type=to]; +"8 pow_1" [id=8, type=pow]; +"9 mean" [id=9, type=mean]; +"10 add" [id=10, type=add]; +"11 rsqrt" [id=11, type=rsqrt]; +"12 mul" [id=12, type=mul]; +"13 _assert_tensor_metadata_default_1" [id=13, type="_assert_tensor_metadata"]; +"14 to_1" [id=14, type=to]; +"15 mul_1" [id=15, type=mul]; +"16 q_proj_weight_updated_constant0" [id=16, type="get_attr"]; +"17 symmetric_weights_decompressor_q_proj_weight_0" [id=17, type="call_module"]; +"18 linear" [id=18, type=linear]; +"19 view" [id=19, type=view]; +"20 transpose" [id=20, type=transpose]; +"21 k_proj_weight_updated_constant0" [id=21, type="get_attr"]; +"22 symmetric_weights_decompressor_k_proj_weight_0" [id=22, type="call_module"]; +"23 linear_1" [id=23, type=linear]; +"24 view_1" [id=24, type=view]; +"25 transpose_1" [id=25, type=transpose]; +"26 v_proj_weight_updated_constant0" [id=26, type="get_attr"]; +"27 symmetric_weights_decompressor_v_proj_weight_0" [id=27, type="call_module"]; +"28 linear_2" [id=28, type=linear]; +"29 view_2" [id=29, type=view]; +"30 transpose_2" [id=30, type=transpose]; +"31 index" [id=31, type=index]; +"32 index_1" [id=32, type=index]; +"33 mul_2" [id=33, type=mul]; +"34 slice_1" [id=34, type=slice]; +"35 slice_2" [id=35, type=slice]; +"36 neg" [id=36, type=neg]; +"37 cat" [id=37, type=cat]; +"38 mul_3" [id=38, type=mul]; +"39 add_1" [id=39, type=add]; +"40 mul_4" [id=40, type=mul]; +"41 slice_3" [id=41, type=slice]; +"42 slice_4" [id=42, type=slice]; +"43 neg_1" [id=43, type=neg]; +"44 cat_1" [id=44, type=cat]; +"45 mul_5" [id=45, type=mul]; +"46 add_2" [id=46, type=add]; +"47 scaled_dot_product_attention" [id=47, type="scaled_dot_product_attention"]; +"48 transpose_3" [id=48, type=transpose]; +"49 view_3" [id=49, type=view]; +"50 o_proj_weight_updated_constant0" [id=50, type="get_attr"]; +"51 symmetric_weights_decompressor_o_proj_weight_0" [id=51, type="call_module"]; +"52 linear_3" [id=52, type=linear]; +"53 add_3" [id=53, type=add]; +"54 _assert_tensor_metadata_default_2" [id=54, type="_assert_tensor_metadata"]; +"55 to_2" [id=55, type=to]; +"56 pow_2" [id=56, type=pow]; +"57 mean_1" [id=57, type=mean]; +"58 add_4" [id=58, type=add]; +"59 rsqrt_1" [id=59, type=rsqrt]; +"60 mul_6" [id=60, type=mul]; +"61 _assert_tensor_metadata_default_3" [id=61, type="_assert_tensor_metadata"]; +"62 to_3" [id=62, type=to]; +"63 mul_7" [id=63, type=mul]; +"64 mlp_gate_proj_weight_updated_constant0" [id=64, type="get_attr"]; +"65 asymmetric_weights_decompressor_mlp_gate_proj_weight_0" [id=65, type="call_module"]; +"66 linear_4" [id=66, type=linear]; +"67 silu" [id=67, type=silu]; +"68 mlp_up_proj_weight_updated_constant0" [id=68, type="get_attr"]; +"69 asymmetric_weights_decompressor_mlp_up_proj_weight_0" [id=69, type="call_module"]; +"70 linear_5" [id=70, type=linear]; +"71 mul_8" [id=71, type=mul]; +"72 mlp_down_proj_weight_updated_constant0" [id=72, type="get_attr"]; +"73 symmetric_weights_decompressor_mlp_down_proj_weight_0" [id=73, type="call_module"]; +"74 linear_6" [id=74, type=linear]; +"75 add_5" [id=75, type=add]; +"76 output" [id=76, type=output]; +"0 attn_norm_weight" -> "15 mul_1" [style=solid, label="(64,)"]; +"1 mlp_norm_weight" -> "63 mul_7" [style=solid, label="(64,)"]; +"2 rope_cos" -> "31 index" [style=solid, label="(1, 1, 128, 16)"]; +"3 rope_sin" -> "32 index_1" [style=solid, label="(1, 1, 128, 16)"]; +"4 x_embed" -> "6 _assert_tensor_metadata_default" [style=solid, label="(1, 3, 64)"]; +"4 x_embed" -> "7 to" [style=solid, label="(1, 3, 64)"]; +"4 x_embed" -> "53 add_3" [style=solid, label="(1, 3, 64)"]; +"5 arange" -> "31 index" [style=solid, label="(3,)"]; +"5 arange" -> "32 index_1" [style=solid, label="(3,)"]; +"7 to" -> "8 pow_1" [style=solid, label="(1, 3, 64)"]; +"7 to" -> "12 mul" [style=solid, label="(1, 3, 64)"]; +"8 pow_1" -> "9 mean" [style=solid, label="(1, 3, 64)"]; +"9 mean" -> "10 add" [style=solid, label="(1, 3, 1)"]; +"10 add" -> "11 rsqrt" [style=solid, label="(1, 3, 1)"]; +"11 rsqrt" -> "12 mul" [style=solid, label="(1, 3, 1)"]; +"12 mul" -> "13 _assert_tensor_metadata_default_1" [style=solid, label="(1, 3, 64)"]; +"12 mul" -> "14 to_1" [style=solid, label="(1, 3, 64)"]; +"14 to_1" -> "15 mul_1" [style=solid, label="(1, 3, 64)"]; +"15 mul_1" -> "18 linear" [style=solid, label="(1, 3, 64)"]; +"15 mul_1" -> "23 linear_1" [style=solid, label="(1, 3, 64)"]; +"15 mul_1" -> "28 linear_2" [style=solid, label="(1, 3, 64)"]; +"16 q_proj_weight_updated_constant0" -> "17 symmetric_weights_decompressor_q_proj_weight_0" [style=solid, label="(2048, 1)"]; +"17 symmetric_weights_decompressor_q_proj_weight_0" -> "18 linear" [style=solid, label="(64, 64)"]; +"18 linear" -> "19 view" [style=solid, label="(1, 3, 64)"]; +"19 view" -> "20 transpose" [style=solid, label="(1, 3, 4, 16)"]; +"20 transpose" -> "33 mul_2" [style=solid, label="(1, 4, 3, 16)"]; +"20 transpose" -> "34 slice_1" [style=solid, label="(1, 4, 3, 16)"]; +"20 transpose" -> "35 slice_2" [style=solid, label="(1, 4, 3, 16)"]; +"21 k_proj_weight_updated_constant0" -> "22 symmetric_weights_decompressor_k_proj_weight_0" [style=solid, label="(2048, 1)"]; +"22 symmetric_weights_decompressor_k_proj_weight_0" -> "23 linear_1" [style=solid, label="(64, 64)"]; +"23 linear_1" -> "24 view_1" [style=solid, label="(1, 3, 64)"]; +"24 view_1" -> "25 transpose_1" [style=solid, label="(1, 3, 4, 16)"]; +"25 transpose_1" -> "40 mul_4" [style=solid, label="(1, 4, 3, 16)"]; +"25 transpose_1" -> "41 slice_3" [style=solid, label="(1, 4, 3, 16)"]; +"25 transpose_1" -> "42 slice_4" [style=solid, label="(1, 4, 3, 16)"]; +"26 v_proj_weight_updated_constant0" -> "27 symmetric_weights_decompressor_v_proj_weight_0" [style=solid, label="(2048, 1)"]; +"27 symmetric_weights_decompressor_v_proj_weight_0" -> "28 linear_2" [style=solid, label="(64, 64)"]; +"28 linear_2" -> "29 view_2" [style=solid, label="(1, 3, 64)"]; +"29 view_2" -> "30 transpose_2" [style=solid, label="(1, 3, 4, 16)"]; +"30 transpose_2" -> "47 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; +"31 index" -> "33 mul_2" [style=solid, label="(1, 1, 3, 16)"]; +"31 index" -> "40 mul_4" [style=solid, label="(1, 1, 3, 16)"]; +"32 index_1" -> "38 mul_3" [style=solid, label="(1, 1, 3, 16)"]; +"32 index_1" -> "45 mul_5" [style=solid, label="(1, 1, 3, 16)"]; +"33 mul_2" -> "39 add_1" [style=solid, label="(1, 4, 3, 16)"]; +"34 slice_1" -> "37 cat" [style=solid, label="(1, 4, 3, 8)"]; +"35 slice_2" -> "36 neg" [style=solid, label="(1, 4, 3, 8)"]; +"36 neg" -> "37 cat" [style=solid, label="(1, 4, 3, 8)"]; +"37 cat" -> "38 mul_3" [style=solid, label="(1, 4, 3, 16)"]; +"38 mul_3" -> "39 add_1" [style=solid, label="(1, 4, 3, 16)"]; +"39 add_1" -> "47 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; +"40 mul_4" -> "46 add_2" [style=solid, label="(1, 4, 3, 16)"]; +"41 slice_3" -> "44 cat_1" [style=solid, label="(1, 4, 3, 8)"]; +"42 slice_4" -> "43 neg_1" [style=solid, label="(1, 4, 3, 8)"]; +"43 neg_1" -> "44 cat_1" [style=solid, label="(1, 4, 3, 8)"]; +"44 cat_1" -> "45 mul_5" [style=solid, label="(1, 4, 3, 16)"]; +"45 mul_5" -> "46 add_2" [style=solid, label="(1, 4, 3, 16)"]; +"46 add_2" -> "47 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; +"47 scaled_dot_product_attention" -> "48 transpose_3" [style=solid, label="(1, 4, 3, 16)"]; +"48 transpose_3" -> "49 view_3" [style=solid, label="(1, 3, 4, 16)"]; +"49 view_3" -> "52 linear_3" [style=solid, label="(1, 3, 64)"]; +"50 o_proj_weight_updated_constant0" -> "51 symmetric_weights_decompressor_o_proj_weight_0" [style=solid, label="(2048, 1)"]; +"51 symmetric_weights_decompressor_o_proj_weight_0" -> "52 linear_3" [style=solid, label="(64, 64)"]; +"52 linear_3" -> "53 add_3" [style=solid, label="(1, 3, 64)"]; +"53 add_3" -> "54 _assert_tensor_metadata_default_2" [style=solid, label="(1, 3, 64)"]; +"53 add_3" -> "55 to_2" [style=solid, label="(1, 3, 64)"]; +"55 to_2" -> "56 pow_2" [style=solid, label="(1, 3, 64)"]; +"55 to_2" -> "60 mul_6" [style=solid, label="(1, 3, 64)"]; +"55 to_2" -> "75 add_5" [style=solid, label="(1, 3, 64)"]; +"56 pow_2" -> "57 mean_1" [style=solid, label="(1, 3, 64)"]; +"57 mean_1" -> "58 add_4" [style=solid, label="(1, 3, 1)"]; +"58 add_4" -> "59 rsqrt_1" [style=solid, label="(1, 3, 1)"]; +"59 rsqrt_1" -> "60 mul_6" [style=solid, label="(1, 3, 1)"]; +"60 mul_6" -> "61 _assert_tensor_metadata_default_3" [style=solid, label="(1, 3, 64)"]; +"60 mul_6" -> "62 to_3" [style=solid, label="(1, 3, 64)"]; +"62 to_3" -> "63 mul_7" [style=solid, label="(1, 3, 64)"]; +"63 mul_7" -> "66 linear_4" [style=solid, label="(1, 3, 64)"]; +"63 mul_7" -> "70 linear_5" [style=solid, label="(1, 3, 64)"]; +"64 mlp_gate_proj_weight_updated_constant0" -> "65 asymmetric_weights_decompressor_mlp_gate_proj_weight_0" [style=solid, label="(128, 64)"]; +"65 asymmetric_weights_decompressor_mlp_gate_proj_weight_0" -> "66 linear_4" [style=solid, label="(128, 64)"]; +"66 linear_4" -> "67 silu" [style=solid, label="(1, 3, 128)"]; +"67 silu" -> "71 mul_8" [style=solid, label="(1, 3, 128)"]; +"68 mlp_up_proj_weight_updated_constant0" -> "69 asymmetric_weights_decompressor_mlp_up_proj_weight_0" [style=solid, label="(128, 64)"]; +"69 asymmetric_weights_decompressor_mlp_up_proj_weight_0" -> "70 linear_5" [style=solid, label="(128, 64)"]; +"70 linear_5" -> "71 mul_8" [style=solid, label="(1, 3, 128)"]; +"71 mul_8" -> "74 linear_6" [style=solid, label="(1, 3, 128)"]; +"72 mlp_down_proj_weight_updated_constant0" -> "73 symmetric_weights_decompressor_mlp_down_proj_weight_0" [style=solid, label="(4096, 1)"]; +"73 symmetric_weights_decompressor_mlp_down_proj_weight_0" -> "74 linear_6" [style=solid, label="(64, 128)"]; +"74 linear_6" -> "75 add_5" [style=solid, label="(1, 3, 64)"]; +"75 add_5" -> "76 output" [style=solid, label="(1, 3, 64)"]; +} diff --git a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_True_sensitivity_metric_mean_activation_variance.dot b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_True_sensitivity_metric_mean_activation_variance.dot new file mode 100644 index 00000000000..31fb9463c88 --- /dev/null +++ b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_True_sensitivity_metric_mean_activation_variance.dot @@ -0,0 +1,169 @@ +strict digraph { +"0 attn_norm_weight" [id=0, type="get_attr"]; +"1 mlp_norm_weight" [id=1, type="get_attr"]; +"2 rope_cos" [id=2, type="get_attr"]; +"3 rope_sin" [id=3, type="get_attr"]; +"4 x_embed" [id=4, type=input]; +"5 arange" [id=5, type=arange]; +"6 _assert_tensor_metadata_default" [id=6, type="_assert_tensor_metadata"]; +"7 to" [id=7, type=to]; +"8 pow_1" [id=8, type=pow]; +"9 mean" [id=9, type=mean]; +"10 add" [id=10, type=add]; +"11 rsqrt" [id=11, type=rsqrt]; +"12 mul" [id=12, type=mul]; +"13 _assert_tensor_metadata_default_1" [id=13, type="_assert_tensor_metadata"]; +"14 to_1" [id=14, type=to]; +"15 mul_1" [id=15, type=mul]; +"16 q_proj_weight_updated_constant0" [id=16, type="get_attr"]; +"17 symmetric_weights_decompressor_q_proj_weight_0" [id=17, type="call_module"]; +"18 linear" [id=18, type=linear]; +"19 view" [id=19, type=view]; +"20 transpose" [id=20, type=transpose]; +"21 k_proj_weight_updated_constant0" [id=21, type="get_attr"]; +"22 symmetric_weights_decompressor_k_proj_weight_0" [id=22, type="call_module"]; +"23 linear_1" [id=23, type=linear]; +"24 view_1" [id=24, type=view]; +"25 transpose_1" [id=25, type=transpose]; +"26 v_proj_weight_updated_constant0" [id=26, type="get_attr"]; +"27 symmetric_weights_decompressor_v_proj_weight_0" [id=27, type="call_module"]; +"28 linear_2" [id=28, type=linear]; +"29 view_2" [id=29, type=view]; +"30 transpose_2" [id=30, type=transpose]; +"31 index" [id=31, type=index]; +"32 index_1" [id=32, type=index]; +"33 mul_2" [id=33, type=mul]; +"34 slice_1" [id=34, type=slice]; +"35 slice_2" [id=35, type=slice]; +"36 neg" [id=36, type=neg]; +"37 cat" [id=37, type=cat]; +"38 mul_3" [id=38, type=mul]; +"39 add_1" [id=39, type=add]; +"40 mul_4" [id=40, type=mul]; +"41 slice_3" [id=41, type=slice]; +"42 slice_4" [id=42, type=slice]; +"43 neg_1" [id=43, type=neg]; +"44 cat_1" [id=44, type=cat]; +"45 mul_5" [id=45, type=mul]; +"46 add_2" [id=46, type=add]; +"47 scaled_dot_product_attention" [id=47, type="scaled_dot_product_attention"]; +"48 transpose_3" [id=48, type=transpose]; +"49 view_3" [id=49, type=view]; +"50 o_proj_weight_updated_constant0" [id=50, type="get_attr"]; +"51 symmetric_weights_decompressor_o_proj_weight_0" [id=51, type="call_module"]; +"52 linear_3" [id=52, type=linear]; +"53 add_3" [id=53, type=add]; +"54 _assert_tensor_metadata_default_2" [id=54, type="_assert_tensor_metadata"]; +"55 to_2" [id=55, type=to]; +"56 pow_2" [id=56, type=pow]; +"57 mean_1" [id=57, type=mean]; +"58 add_4" [id=58, type=add]; +"59 rsqrt_1" [id=59, type=rsqrt]; +"60 mul_6" [id=60, type=mul]; +"61 _assert_tensor_metadata_default_3" [id=61, type="_assert_tensor_metadata"]; +"62 to_3" [id=62, type=to]; +"63 mul_7" [id=63, type=mul]; +"64 mlp_gate_proj_weight_updated_constant0" [id=64, type="get_attr"]; +"65 asymmetric_weights_decompressor_mlp_gate_proj_weight_0" [id=65, type="call_module"]; +"66 linear_4" [id=66, type=linear]; +"67 silu" [id=67, type=silu]; +"68 mlp_up_proj_weight_updated_constant0" [id=68, type="get_attr"]; +"69 asymmetric_weights_decompressor_mlp_up_proj_weight_0" [id=69, type="call_module"]; +"70 linear_5" [id=70, type=linear]; +"71 mul_8" [id=71, type=mul]; +"72 mlp_down_proj_weight_updated_constant0" [id=72, type="get_attr"]; +"73 symmetric_weights_decompressor_mlp_down_proj_weight_0" [id=73, type="call_module"]; +"74 linear_6" [id=74, type=linear]; +"75 add_5" [id=75, type=add]; +"76 output" [id=76, type=output]; +"0 attn_norm_weight" -> "15 mul_1" [style=solid, label="(64,)"]; +"1 mlp_norm_weight" -> "63 mul_7" [style=solid, label="(64,)"]; +"2 rope_cos" -> "31 index" [style=solid, label="(1, 1, 128, 16)"]; +"3 rope_sin" -> "32 index_1" [style=solid, label="(1, 1, 128, 16)"]; +"4 x_embed" -> "6 _assert_tensor_metadata_default" [style=solid, label="(1, 3, 64)"]; +"4 x_embed" -> "7 to" [style=solid, label="(1, 3, 64)"]; +"4 x_embed" -> "53 add_3" [style=solid, label="(1, 3, 64)"]; +"5 arange" -> "31 index" [style=solid, label="(3,)"]; +"5 arange" -> "32 index_1" [style=solid, label="(3,)"]; +"7 to" -> "8 pow_1" [style=solid, label="(1, 3, 64)"]; +"7 to" -> "12 mul" [style=solid, label="(1, 3, 64)"]; +"8 pow_1" -> "9 mean" [style=solid, label="(1, 3, 64)"]; +"9 mean" -> "10 add" [style=solid, label="(1, 3, 1)"]; +"10 add" -> "11 rsqrt" [style=solid, label="(1, 3, 1)"]; +"11 rsqrt" -> "12 mul" [style=solid, label="(1, 3, 1)"]; +"12 mul" -> "13 _assert_tensor_metadata_default_1" [style=solid, label="(1, 3, 64)"]; +"12 mul" -> "14 to_1" [style=solid, label="(1, 3, 64)"]; +"14 to_1" -> "15 mul_1" [style=solid, label="(1, 3, 64)"]; +"15 mul_1" -> "18 linear" [style=solid, label="(1, 3, 64)"]; +"15 mul_1" -> "23 linear_1" [style=solid, label="(1, 3, 64)"]; +"15 mul_1" -> "28 linear_2" [style=solid, label="(1, 3, 64)"]; +"16 q_proj_weight_updated_constant0" -> "17 symmetric_weights_decompressor_q_proj_weight_0" [style=solid, label="(2048, 1)"]; +"17 symmetric_weights_decompressor_q_proj_weight_0" -> "18 linear" [style=solid, label="(64, 64)"]; +"18 linear" -> "19 view" [style=solid, label="(1, 3, 64)"]; +"19 view" -> "20 transpose" [style=solid, label="(1, 3, 4, 16)"]; +"20 transpose" -> "33 mul_2" [style=solid, label="(1, 4, 3, 16)"]; +"20 transpose" -> "34 slice_1" [style=solid, label="(1, 4, 3, 16)"]; +"20 transpose" -> "35 slice_2" [style=solid, label="(1, 4, 3, 16)"]; +"21 k_proj_weight_updated_constant0" -> "22 symmetric_weights_decompressor_k_proj_weight_0" [style=solid, label="(2048, 1)"]; +"22 symmetric_weights_decompressor_k_proj_weight_0" -> "23 linear_1" [style=solid, label="(64, 64)"]; +"23 linear_1" -> "24 view_1" [style=solid, label="(1, 3, 64)"]; +"24 view_1" -> "25 transpose_1" [style=solid, label="(1, 3, 4, 16)"]; +"25 transpose_1" -> "40 mul_4" [style=solid, label="(1, 4, 3, 16)"]; +"25 transpose_1" -> "41 slice_3" [style=solid, label="(1, 4, 3, 16)"]; +"25 transpose_1" -> "42 slice_4" [style=solid, label="(1, 4, 3, 16)"]; +"26 v_proj_weight_updated_constant0" -> "27 symmetric_weights_decompressor_v_proj_weight_0" [style=solid, label="(2048, 1)"]; +"27 symmetric_weights_decompressor_v_proj_weight_0" -> "28 linear_2" [style=solid, label="(64, 64)"]; +"28 linear_2" -> "29 view_2" [style=solid, label="(1, 3, 64)"]; +"29 view_2" -> "30 transpose_2" [style=solid, label="(1, 3, 4, 16)"]; +"30 transpose_2" -> "47 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; +"31 index" -> "33 mul_2" [style=solid, label="(1, 1, 3, 16)"]; +"31 index" -> "40 mul_4" [style=solid, label="(1, 1, 3, 16)"]; +"32 index_1" -> "38 mul_3" [style=solid, label="(1, 1, 3, 16)"]; +"32 index_1" -> "45 mul_5" [style=solid, label="(1, 1, 3, 16)"]; +"33 mul_2" -> "39 add_1" [style=solid, label="(1, 4, 3, 16)"]; +"34 slice_1" -> "37 cat" [style=solid, label="(1, 4, 3, 8)"]; +"35 slice_2" -> "36 neg" [style=solid, label="(1, 4, 3, 8)"]; +"36 neg" -> "37 cat" [style=solid, label="(1, 4, 3, 8)"]; +"37 cat" -> "38 mul_3" [style=solid, label="(1, 4, 3, 16)"]; +"38 mul_3" -> "39 add_1" [style=solid, label="(1, 4, 3, 16)"]; +"39 add_1" -> "47 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; +"40 mul_4" -> "46 add_2" [style=solid, label="(1, 4, 3, 16)"]; +"41 slice_3" -> "44 cat_1" [style=solid, label="(1, 4, 3, 8)"]; +"42 slice_4" -> "43 neg_1" [style=solid, label="(1, 4, 3, 8)"]; +"43 neg_1" -> "44 cat_1" [style=solid, label="(1, 4, 3, 8)"]; +"44 cat_1" -> "45 mul_5" [style=solid, label="(1, 4, 3, 16)"]; +"45 mul_5" -> "46 add_2" [style=solid, label="(1, 4, 3, 16)"]; +"46 add_2" -> "47 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; +"47 scaled_dot_product_attention" -> "48 transpose_3" [style=solid, label="(1, 4, 3, 16)"]; +"48 transpose_3" -> "49 view_3" [style=solid, label="(1, 3, 4, 16)"]; +"49 view_3" -> "52 linear_3" [style=solid, label="(1, 3, 64)"]; +"50 o_proj_weight_updated_constant0" -> "51 symmetric_weights_decompressor_o_proj_weight_0" [style=solid, label="(2048, 1)"]; +"51 symmetric_weights_decompressor_o_proj_weight_0" -> "52 linear_3" [style=solid, label="(64, 64)"]; +"52 linear_3" -> "53 add_3" [style=solid, label="(1, 3, 64)"]; +"53 add_3" -> "54 _assert_tensor_metadata_default_2" [style=solid, label="(1, 3, 64)"]; +"53 add_3" -> "55 to_2" [style=solid, label="(1, 3, 64)"]; +"55 to_2" -> "56 pow_2" [style=solid, label="(1, 3, 64)"]; +"55 to_2" -> "60 mul_6" [style=solid, label="(1, 3, 64)"]; +"55 to_2" -> "75 add_5" [style=solid, label="(1, 3, 64)"]; +"56 pow_2" -> "57 mean_1" [style=solid, label="(1, 3, 64)"]; +"57 mean_1" -> "58 add_4" [style=solid, label="(1, 3, 1)"]; +"58 add_4" -> "59 rsqrt_1" [style=solid, label="(1, 3, 1)"]; +"59 rsqrt_1" -> "60 mul_6" [style=solid, label="(1, 3, 1)"]; +"60 mul_6" -> "61 _assert_tensor_metadata_default_3" [style=solid, label="(1, 3, 64)"]; +"60 mul_6" -> "62 to_3" [style=solid, label="(1, 3, 64)"]; +"62 to_3" -> "63 mul_7" [style=solid, label="(1, 3, 64)"]; +"63 mul_7" -> "66 linear_4" [style=solid, label="(1, 3, 64)"]; +"63 mul_7" -> "70 linear_5" [style=solid, label="(1, 3, 64)"]; +"64 mlp_gate_proj_weight_updated_constant0" -> "65 asymmetric_weights_decompressor_mlp_gate_proj_weight_0" [style=solid, label="(128, 64)"]; +"65 asymmetric_weights_decompressor_mlp_gate_proj_weight_0" -> "66 linear_4" [style=solid, label="(128, 64)"]; +"66 linear_4" -> "67 silu" [style=solid, label="(1, 3, 128)"]; +"67 silu" -> "71 mul_8" [style=solid, label="(1, 3, 128)"]; +"68 mlp_up_proj_weight_updated_constant0" -> "69 asymmetric_weights_decompressor_mlp_up_proj_weight_0" [style=solid, label="(128, 64)"]; +"69 asymmetric_weights_decompressor_mlp_up_proj_weight_0" -> "70 linear_5" [style=solid, label="(128, 64)"]; +"70 linear_5" -> "71 mul_8" [style=solid, label="(1, 3, 128)"]; +"71 mul_8" -> "74 linear_6" [style=solid, label="(1, 3, 128)"]; +"72 mlp_down_proj_weight_updated_constant0" -> "73 symmetric_weights_decompressor_mlp_down_proj_weight_0" [style=solid, label="(4096, 1)"]; +"73 symmetric_weights_decompressor_mlp_down_proj_weight_0" -> "74 linear_6" [style=solid, label="(64, 128)"]; +"74 linear_6" -> "75 add_5" [style=solid, label="(1, 3, 64)"]; +"75 add_5" -> "76 output" [style=solid, label="(1, 3, 64)"]; +} diff --git a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_True_sensitivity_metric_weight_quantization_error.dot b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_True_sensitivity_metric_weight_quantization_error.dot new file mode 100644 index 00000000000..99c2d53d916 --- /dev/null +++ b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_True_sensitivity_metric_weight_quantization_error.dot @@ -0,0 +1,169 @@ +strict digraph { +"0 attn_norm_weight" [id=0, type="get_attr"]; +"1 mlp_norm_weight" [id=1, type="get_attr"]; +"2 rope_cos" [id=2, type="get_attr"]; +"3 rope_sin" [id=3, type="get_attr"]; +"4 x_embed" [id=4, type=input]; +"5 arange" [id=5, type=arange]; +"6 _assert_tensor_metadata_default" [id=6, type="_assert_tensor_metadata"]; +"7 to" [id=7, type=to]; +"8 pow_1" [id=8, type=pow]; +"9 mean" [id=9, type=mean]; +"10 add" [id=10, type=add]; +"11 rsqrt" [id=11, type=rsqrt]; +"12 mul" [id=12, type=mul]; +"13 _assert_tensor_metadata_default_1" [id=13, type="_assert_tensor_metadata"]; +"14 to_1" [id=14, type=to]; +"15 mul_1" [id=15, type=mul]; +"16 q_proj_weight_updated_constant0" [id=16, type="get_attr"]; +"17 asymmetric_weights_decompressor_q_proj_weight_0" [id=17, type="call_module"]; +"18 linear" [id=18, type=linear]; +"19 view" [id=19, type=view]; +"20 transpose" [id=20, type=transpose]; +"21 k_proj_weight_updated_constant0" [id=21, type="get_attr"]; +"22 symmetric_weights_decompressor_k_proj_weight_0" [id=22, type="call_module"]; +"23 linear_1" [id=23, type=linear]; +"24 view_1" [id=24, type=view]; +"25 transpose_1" [id=25, type=transpose]; +"26 v_proj_weight_updated_constant0" [id=26, type="get_attr"]; +"27 symmetric_weights_decompressor_v_proj_weight_0" [id=27, type="call_module"]; +"28 linear_2" [id=28, type=linear]; +"29 view_2" [id=29, type=view]; +"30 transpose_2" [id=30, type=transpose]; +"31 index" [id=31, type=index]; +"32 index_1" [id=32, type=index]; +"33 mul_2" [id=33, type=mul]; +"34 slice_1" [id=34, type=slice]; +"35 slice_2" [id=35, type=slice]; +"36 neg" [id=36, type=neg]; +"37 cat" [id=37, type=cat]; +"38 mul_3" [id=38, type=mul]; +"39 add_1" [id=39, type=add]; +"40 mul_4" [id=40, type=mul]; +"41 slice_3" [id=41, type=slice]; +"42 slice_4" [id=42, type=slice]; +"43 neg_1" [id=43, type=neg]; +"44 cat_1" [id=44, type=cat]; +"45 mul_5" [id=45, type=mul]; +"46 add_2" [id=46, type=add]; +"47 scaled_dot_product_attention" [id=47, type="scaled_dot_product_attention"]; +"48 transpose_3" [id=48, type=transpose]; +"49 view_3" [id=49, type=view]; +"50 o_proj_weight_updated_constant0" [id=50, type="get_attr"]; +"51 symmetric_weights_decompressor_o_proj_weight_0" [id=51, type="call_module"]; +"52 linear_3" [id=52, type=linear]; +"53 add_3" [id=53, type=add]; +"54 _assert_tensor_metadata_default_2" [id=54, type="_assert_tensor_metadata"]; +"55 to_2" [id=55, type=to]; +"56 pow_2" [id=56, type=pow]; +"57 mean_1" [id=57, type=mean]; +"58 add_4" [id=58, type=add]; +"59 rsqrt_1" [id=59, type=rsqrt]; +"60 mul_6" [id=60, type=mul]; +"61 _assert_tensor_metadata_default_3" [id=61, type="_assert_tensor_metadata"]; +"62 to_3" [id=62, type=to]; +"63 mul_7" [id=63, type=mul]; +"64 mlp_gate_proj_weight_updated_constant0" [id=64, type="get_attr"]; +"65 symmetric_weights_decompressor_mlp_gate_proj_weight_0" [id=65, type="call_module"]; +"66 linear_4" [id=66, type=linear]; +"67 silu" [id=67, type=silu]; +"68 mlp_up_proj_weight_updated_constant0" [id=68, type="get_attr"]; +"69 symmetric_weights_decompressor_mlp_up_proj_weight_0" [id=69, type="call_module"]; +"70 linear_5" [id=70, type=linear]; +"71 mul_8" [id=71, type=mul]; +"72 mlp_down_proj_weight_updated_constant0" [id=72, type="get_attr"]; +"73 asymmetric_weights_decompressor_mlp_down_proj_weight_0" [id=73, type="call_module"]; +"74 linear_6" [id=74, type=linear]; +"75 add_5" [id=75, type=add]; +"76 output" [id=76, type=output]; +"0 attn_norm_weight" -> "15 mul_1" [style=solid, label="(64,)"]; +"1 mlp_norm_weight" -> "63 mul_7" [style=solid, label="(64,)"]; +"2 rope_cos" -> "31 index" [style=solid, label="(1, 1, 128, 16)"]; +"3 rope_sin" -> "32 index_1" [style=solid, label="(1, 1, 128, 16)"]; +"4 x_embed" -> "6 _assert_tensor_metadata_default" [style=solid, label="(1, 3, 64)"]; +"4 x_embed" -> "7 to" [style=solid, label="(1, 3, 64)"]; +"4 x_embed" -> "53 add_3" [style=solid, label="(1, 3, 64)"]; +"5 arange" -> "31 index" [style=solid, label="(3,)"]; +"5 arange" -> "32 index_1" [style=solid, label="(3,)"]; +"7 to" -> "8 pow_1" [style=solid, label="(1, 3, 64)"]; +"7 to" -> "12 mul" [style=solid, label="(1, 3, 64)"]; +"8 pow_1" -> "9 mean" [style=solid, label="(1, 3, 64)"]; +"9 mean" -> "10 add" [style=solid, label="(1, 3, 1)"]; +"10 add" -> "11 rsqrt" [style=solid, label="(1, 3, 1)"]; +"11 rsqrt" -> "12 mul" [style=solid, label="(1, 3, 1)"]; +"12 mul" -> "13 _assert_tensor_metadata_default_1" [style=solid, label="(1, 3, 64)"]; +"12 mul" -> "14 to_1" [style=solid, label="(1, 3, 64)"]; +"14 to_1" -> "15 mul_1" [style=solid, label="(1, 3, 64)"]; +"15 mul_1" -> "18 linear" [style=solid, label="(1, 3, 64)"]; +"15 mul_1" -> "23 linear_1" [style=solid, label="(1, 3, 64)"]; +"15 mul_1" -> "28 linear_2" [style=solid, label="(1, 3, 64)"]; +"16 q_proj_weight_updated_constant0" -> "17 asymmetric_weights_decompressor_q_proj_weight_0" [style=solid, label="(64, 64)"]; +"17 asymmetric_weights_decompressor_q_proj_weight_0" -> "18 linear" [style=solid, label="(64, 64)"]; +"18 linear" -> "19 view" [style=solid, label="(1, 3, 64)"]; +"19 view" -> "20 transpose" [style=solid, label="(1, 3, 4, 16)"]; +"20 transpose" -> "33 mul_2" [style=solid, label="(1, 4, 3, 16)"]; +"20 transpose" -> "34 slice_1" [style=solid, label="(1, 4, 3, 16)"]; +"20 transpose" -> "35 slice_2" [style=solid, label="(1, 4, 3, 16)"]; +"21 k_proj_weight_updated_constant0" -> "22 symmetric_weights_decompressor_k_proj_weight_0" [style=solid, label="(2048, 1)"]; +"22 symmetric_weights_decompressor_k_proj_weight_0" -> "23 linear_1" [style=solid, label="(64, 64)"]; +"23 linear_1" -> "24 view_1" [style=solid, label="(1, 3, 64)"]; +"24 view_1" -> "25 transpose_1" [style=solid, label="(1, 3, 4, 16)"]; +"25 transpose_1" -> "40 mul_4" [style=solid, label="(1, 4, 3, 16)"]; +"25 transpose_1" -> "41 slice_3" [style=solid, label="(1, 4, 3, 16)"]; +"25 transpose_1" -> "42 slice_4" [style=solid, label="(1, 4, 3, 16)"]; +"26 v_proj_weight_updated_constant0" -> "27 symmetric_weights_decompressor_v_proj_weight_0" [style=solid, label="(2048, 1)"]; +"27 symmetric_weights_decompressor_v_proj_weight_0" -> "28 linear_2" [style=solid, label="(64, 64)"]; +"28 linear_2" -> "29 view_2" [style=solid, label="(1, 3, 64)"]; +"29 view_2" -> "30 transpose_2" [style=solid, label="(1, 3, 4, 16)"]; +"30 transpose_2" -> "47 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; +"31 index" -> "33 mul_2" [style=solid, label="(1, 1, 3, 16)"]; +"31 index" -> "40 mul_4" [style=solid, label="(1, 1, 3, 16)"]; +"32 index_1" -> "38 mul_3" [style=solid, label="(1, 1, 3, 16)"]; +"32 index_1" -> "45 mul_5" [style=solid, label="(1, 1, 3, 16)"]; +"33 mul_2" -> "39 add_1" [style=solid, label="(1, 4, 3, 16)"]; +"34 slice_1" -> "37 cat" [style=solid, label="(1, 4, 3, 8)"]; +"35 slice_2" -> "36 neg" [style=solid, label="(1, 4, 3, 8)"]; +"36 neg" -> "37 cat" [style=solid, label="(1, 4, 3, 8)"]; +"37 cat" -> "38 mul_3" [style=solid, label="(1, 4, 3, 16)"]; +"38 mul_3" -> "39 add_1" [style=solid, label="(1, 4, 3, 16)"]; +"39 add_1" -> "47 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; +"40 mul_4" -> "46 add_2" [style=solid, label="(1, 4, 3, 16)"]; +"41 slice_3" -> "44 cat_1" [style=solid, label="(1, 4, 3, 8)"]; +"42 slice_4" -> "43 neg_1" [style=solid, label="(1, 4, 3, 8)"]; +"43 neg_1" -> "44 cat_1" [style=solid, label="(1, 4, 3, 8)"]; +"44 cat_1" -> "45 mul_5" [style=solid, label="(1, 4, 3, 16)"]; +"45 mul_5" -> "46 add_2" [style=solid, label="(1, 4, 3, 16)"]; +"46 add_2" -> "47 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; +"47 scaled_dot_product_attention" -> "48 transpose_3" [style=solid, label="(1, 4, 3, 16)"]; +"48 transpose_3" -> "49 view_3" [style=solid, label="(1, 3, 4, 16)"]; +"49 view_3" -> "52 linear_3" [style=solid, label="(1, 3, 64)"]; +"50 o_proj_weight_updated_constant0" -> "51 symmetric_weights_decompressor_o_proj_weight_0" [style=solid, label="(2048, 1)"]; +"51 symmetric_weights_decompressor_o_proj_weight_0" -> "52 linear_3" [style=solid, label="(64, 64)"]; +"52 linear_3" -> "53 add_3" [style=solid, label="(1, 3, 64)"]; +"53 add_3" -> "54 _assert_tensor_metadata_default_2" [style=solid, label="(1, 3, 64)"]; +"53 add_3" -> "55 to_2" [style=solid, label="(1, 3, 64)"]; +"55 to_2" -> "56 pow_2" [style=solid, label="(1, 3, 64)"]; +"55 to_2" -> "60 mul_6" [style=solid, label="(1, 3, 64)"]; +"55 to_2" -> "75 add_5" [style=solid, label="(1, 3, 64)"]; +"56 pow_2" -> "57 mean_1" [style=solid, label="(1, 3, 64)"]; +"57 mean_1" -> "58 add_4" [style=solid, label="(1, 3, 1)"]; +"58 add_4" -> "59 rsqrt_1" [style=solid, label="(1, 3, 1)"]; +"59 rsqrt_1" -> "60 mul_6" [style=solid, label="(1, 3, 1)"]; +"60 mul_6" -> "61 _assert_tensor_metadata_default_3" [style=solid, label="(1, 3, 64)"]; +"60 mul_6" -> "62 to_3" [style=solid, label="(1, 3, 64)"]; +"62 to_3" -> "63 mul_7" [style=solid, label="(1, 3, 64)"]; +"63 mul_7" -> "66 linear_4" [style=solid, label="(1, 3, 64)"]; +"63 mul_7" -> "70 linear_5" [style=solid, label="(1, 3, 64)"]; +"64 mlp_gate_proj_weight_updated_constant0" -> "65 symmetric_weights_decompressor_mlp_gate_proj_weight_0" [style=solid, label="(4096, 1)"]; +"65 symmetric_weights_decompressor_mlp_gate_proj_weight_0" -> "66 linear_4" [style=solid, label="(128, 64)"]; +"66 linear_4" -> "67 silu" [style=solid, label="(1, 3, 128)"]; +"67 silu" -> "71 mul_8" [style=solid, label="(1, 3, 128)"]; +"68 mlp_up_proj_weight_updated_constant0" -> "69 symmetric_weights_decompressor_mlp_up_proj_weight_0" [style=solid, label="(4096, 1)"]; +"69 symmetric_weights_decompressor_mlp_up_proj_weight_0" -> "70 linear_5" [style=solid, label="(128, 64)"]; +"70 linear_5" -> "71 mul_8" [style=solid, label="(1, 3, 128)"]; +"71 mul_8" -> "74 linear_6" [style=solid, label="(1, 3, 128)"]; +"72 mlp_down_proj_weight_updated_constant0" -> "73 asymmetric_weights_decompressor_mlp_down_proj_weight_0" [style=solid, label="(64, 128)"]; +"73 asymmetric_weights_decompressor_mlp_down_proj_weight_0" -> "74 linear_6" [style=solid, label="(64, 128)"]; +"74 linear_6" -> "75 add_5" [style=solid, label="(1, 3, 64)"]; +"75 add_5" -> "76 output" [style=solid, label="(1, 3, 64)"]; +} diff --git a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int8wo_asym_gs-1_ratio1_all_layers_False_ref_wc_param.json b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int8wo_asym_gs-1_ratio1_all_layers_False_ref_wc_param.json new file mode 100644 index 00000000000..69d4cf0f6a8 --- /dev/null +++ b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int8wo_asym_gs-1_ratio1_all_layers_False_ref_wc_param.json @@ -0,0 +1,128 @@ +[ + { + "weight_name": "q_proj_weight", + "node_with_weight": "linear", + "weight_port_id": 1, + "weight_dtype": "float32", + "weight_shape": [ + 64, + 64 + ], + "reduction_axes": [ + 1 + ], + "compression_config": { + "mode": "int8_asym", + "group_size": -1, + "codebook_values": null + } + }, + { + "weight_name": "k_proj_weight", + "node_with_weight": "linear_1", + "weight_port_id": 1, + "weight_dtype": "float32", + "weight_shape": [ + 64, + 64 + ], + "reduction_axes": [ + 1 + ], + "compression_config": { + "mode": "int8_asym", + "group_size": -1, + "codebook_values": null + } + }, + { + "weight_name": "v_proj_weight", + "node_with_weight": "linear_2", + "weight_port_id": 1, + "weight_dtype": "float32", + "weight_shape": [ + 64, + 64 + ], + "reduction_axes": [ + 1 + ], + "compression_config": { + "mode": "int8_asym", + "group_size": -1, + "codebook_values": null + } + }, + { + "weight_name": "o_proj_weight", + "node_with_weight": "linear_3", + "weight_port_id": 1, + "weight_dtype": "float32", + "weight_shape": [ + 64, + 64 + ], + "reduction_axes": [ + 1 + ], + "compression_config": { + "mode": "int8_asym", + "group_size": -1, + "codebook_values": null + } + }, + { + "weight_name": "mlp_gate_proj_weight", + "node_with_weight": "linear_4", + "weight_port_id": 1, + "weight_dtype": "float32", + "weight_shape": [ + 128, + 64 + ], + "reduction_axes": [ + 1 + ], + "compression_config": { + "mode": "int8_asym", + "group_size": -1, + "codebook_values": null + } + }, + { + "weight_name": "mlp_up_proj_weight", + "node_with_weight": "linear_5", + "weight_port_id": 1, + "weight_dtype": "float32", + "weight_shape": [ + 128, + 64 + ], + "reduction_axes": [ + 1 + ], + "compression_config": { + "mode": "int8_asym", + "group_size": -1, + "codebook_values": null + } + }, + { + "weight_name": "mlp_down_proj_weight", + "node_with_weight": "linear_6", + "weight_port_id": 1, + "weight_dtype": "float32", + "weight_shape": [ + 64, + 128 + ], + "reduction_axes": [ + 1 + ], + "compression_config": { + "mode": "int8_asym", + "group_size": -1, + "codebook_values": null + } + } +] \ No newline at end of file diff --git a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int8wo_asym_gs-1_ratio1_all_layers_False_sensitivity_metric_None.dot b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int8wo_asym_gs-1_ratio1_all_layers_False_sensitivity_metric_None.dot new file mode 100644 index 00000000000..29de7b02841 --- /dev/null +++ b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int8wo_asym_gs-1_ratio1_all_layers_False_sensitivity_metric_None.dot @@ -0,0 +1,169 @@ +strict digraph { +"0 attn_norm_weight" [id=0, type="get_attr"]; +"1 mlp_norm_weight" [id=1, type="get_attr"]; +"2 rope_cos" [id=2, type="get_attr"]; +"3 rope_sin" [id=3, type="get_attr"]; +"4 x_embed" [id=4, type=input]; +"5 arange" [id=5, type=arange]; +"6 _assert_tensor_metadata_default" [id=6, type="_assert_tensor_metadata"]; +"7 to" [id=7, type=to]; +"8 pow_1" [id=8, type=pow]; +"9 mean" [id=9, type=mean]; +"10 add" [id=10, type=add]; +"11 rsqrt" [id=11, type=rsqrt]; +"12 mul" [id=12, type=mul]; +"13 _assert_tensor_metadata_default_1" [id=13, type="_assert_tensor_metadata"]; +"14 to_1" [id=14, type=to]; +"15 mul_1" [id=15, type=mul]; +"16 q_proj_weight_updated_constant0" [id=16, type="get_attr"]; +"17 asymmetric_weights_decompressor_q_proj_weight_0" [id=17, type="call_module"]; +"18 linear" [id=18, type=linear]; +"19 view" [id=19, type=view]; +"20 transpose" [id=20, type=transpose]; +"21 k_proj_weight_updated_constant0" [id=21, type="get_attr"]; +"22 asymmetric_weights_decompressor_k_proj_weight_0" [id=22, type="call_module"]; +"23 linear_1" [id=23, type=linear]; +"24 view_1" [id=24, type=view]; +"25 transpose_1" [id=25, type=transpose]; +"26 v_proj_weight_updated_constant0" [id=26, type="get_attr"]; +"27 asymmetric_weights_decompressor_v_proj_weight_0" [id=27, type="call_module"]; +"28 linear_2" [id=28, type=linear]; +"29 view_2" [id=29, type=view]; +"30 transpose_2" [id=30, type=transpose]; +"31 index" [id=31, type=index]; +"32 index_1" [id=32, type=index]; +"33 mul_2" [id=33, type=mul]; +"34 slice_1" [id=34, type=slice]; +"35 slice_2" [id=35, type=slice]; +"36 neg" [id=36, type=neg]; +"37 cat" [id=37, type=cat]; +"38 mul_3" [id=38, type=mul]; +"39 add_1" [id=39, type=add]; +"40 mul_4" [id=40, type=mul]; +"41 slice_3" [id=41, type=slice]; +"42 slice_4" [id=42, type=slice]; +"43 neg_1" [id=43, type=neg]; +"44 cat_1" [id=44, type=cat]; +"45 mul_5" [id=45, type=mul]; +"46 add_2" [id=46, type=add]; +"47 scaled_dot_product_attention" [id=47, type="scaled_dot_product_attention"]; +"48 transpose_3" [id=48, type=transpose]; +"49 view_3" [id=49, type=view]; +"50 o_proj_weight_updated_constant0" [id=50, type="get_attr"]; +"51 asymmetric_weights_decompressor_o_proj_weight_0" [id=51, type="call_module"]; +"52 linear_3" [id=52, type=linear]; +"53 add_3" [id=53, type=add]; +"54 _assert_tensor_metadata_default_2" [id=54, type="_assert_tensor_metadata"]; +"55 to_2" [id=55, type=to]; +"56 pow_2" [id=56, type=pow]; +"57 mean_1" [id=57, type=mean]; +"58 add_4" [id=58, type=add]; +"59 rsqrt_1" [id=59, type=rsqrt]; +"60 mul_6" [id=60, type=mul]; +"61 _assert_tensor_metadata_default_3" [id=61, type="_assert_tensor_metadata"]; +"62 to_3" [id=62, type=to]; +"63 mul_7" [id=63, type=mul]; +"64 mlp_gate_proj_weight_updated_constant0" [id=64, type="get_attr"]; +"65 asymmetric_weights_decompressor_mlp_gate_proj_weight_0" [id=65, type="call_module"]; +"66 linear_4" [id=66, type=linear]; +"67 silu" [id=67, type=silu]; +"68 mlp_up_proj_weight_updated_constant0" [id=68, type="get_attr"]; +"69 asymmetric_weights_decompressor_mlp_up_proj_weight_0" [id=69, type="call_module"]; +"70 linear_5" [id=70, type=linear]; +"71 mul_8" [id=71, type=mul]; +"72 mlp_down_proj_weight_updated_constant0" [id=72, type="get_attr"]; +"73 asymmetric_weights_decompressor_mlp_down_proj_weight_0" [id=73, type="call_module"]; +"74 linear_6" [id=74, type=linear]; +"75 add_5" [id=75, type=add]; +"76 output" [id=76, type=output]; +"0 attn_norm_weight" -> "15 mul_1" [style=solid, label="(64,)"]; +"1 mlp_norm_weight" -> "63 mul_7" [style=solid, label="(64,)"]; +"2 rope_cos" -> "31 index" [style=solid, label="(1, 1, 128, 16)"]; +"3 rope_sin" -> "32 index_1" [style=solid, label="(1, 1, 128, 16)"]; +"4 x_embed" -> "6 _assert_tensor_metadata_default" [style=solid, label="(1, 3, 64)"]; +"4 x_embed" -> "7 to" [style=solid, label="(1, 3, 64)"]; +"4 x_embed" -> "53 add_3" [style=solid, label="(1, 3, 64)"]; +"5 arange" -> "31 index" [style=solid, label="(3,)"]; +"5 arange" -> "32 index_1" [style=solid, label="(3,)"]; +"7 to" -> "8 pow_1" [style=solid, label="(1, 3, 64)"]; +"7 to" -> "12 mul" [style=solid, label="(1, 3, 64)"]; +"8 pow_1" -> "9 mean" [style=solid, label="(1, 3, 64)"]; +"9 mean" -> "10 add" [style=solid, label="(1, 3, 1)"]; +"10 add" -> "11 rsqrt" [style=solid, label="(1, 3, 1)"]; +"11 rsqrt" -> "12 mul" [style=solid, label="(1, 3, 1)"]; +"12 mul" -> "13 _assert_tensor_metadata_default_1" [style=solid, label="(1, 3, 64)"]; +"12 mul" -> "14 to_1" [style=solid, label="(1, 3, 64)"]; +"14 to_1" -> "15 mul_1" [style=solid, label="(1, 3, 64)"]; +"15 mul_1" -> "18 linear" [style=solid, label="(1, 3, 64)"]; +"15 mul_1" -> "23 linear_1" [style=solid, label="(1, 3, 64)"]; +"15 mul_1" -> "28 linear_2" [style=solid, label="(1, 3, 64)"]; +"16 q_proj_weight_updated_constant0" -> "17 asymmetric_weights_decompressor_q_proj_weight_0" [style=solid, label="(64, 64)"]; +"17 asymmetric_weights_decompressor_q_proj_weight_0" -> "18 linear" [style=solid, label="(64, 64)"]; +"18 linear" -> "19 view" [style=solid, label="(1, 3, 64)"]; +"19 view" -> "20 transpose" [style=solid, label="(1, 3, 4, 16)"]; +"20 transpose" -> "33 mul_2" [style=solid, label="(1, 4, 3, 16)"]; +"20 transpose" -> "34 slice_1" [style=solid, label="(1, 4, 3, 16)"]; +"20 transpose" -> "35 slice_2" [style=solid, label="(1, 4, 3, 16)"]; +"21 k_proj_weight_updated_constant0" -> "22 asymmetric_weights_decompressor_k_proj_weight_0" [style=solid, label="(64, 64)"]; +"22 asymmetric_weights_decompressor_k_proj_weight_0" -> "23 linear_1" [style=solid, label="(64, 64)"]; +"23 linear_1" -> "24 view_1" [style=solid, label="(1, 3, 64)"]; +"24 view_1" -> "25 transpose_1" [style=solid, label="(1, 3, 4, 16)"]; +"25 transpose_1" -> "40 mul_4" [style=solid, label="(1, 4, 3, 16)"]; +"25 transpose_1" -> "41 slice_3" [style=solid, label="(1, 4, 3, 16)"]; +"25 transpose_1" -> "42 slice_4" [style=solid, label="(1, 4, 3, 16)"]; +"26 v_proj_weight_updated_constant0" -> "27 asymmetric_weights_decompressor_v_proj_weight_0" [style=solid, label="(64, 64)"]; +"27 asymmetric_weights_decompressor_v_proj_weight_0" -> "28 linear_2" [style=solid, label="(64, 64)"]; +"28 linear_2" -> "29 view_2" [style=solid, label="(1, 3, 64)"]; +"29 view_2" -> "30 transpose_2" [style=solid, label="(1, 3, 4, 16)"]; +"30 transpose_2" -> "47 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; +"31 index" -> "33 mul_2" [style=solid, label="(1, 1, 3, 16)"]; +"31 index" -> "40 mul_4" [style=solid, label="(1, 1, 3, 16)"]; +"32 index_1" -> "38 mul_3" [style=solid, label="(1, 1, 3, 16)"]; +"32 index_1" -> "45 mul_5" [style=solid, label="(1, 1, 3, 16)"]; +"33 mul_2" -> "39 add_1" [style=solid, label="(1, 4, 3, 16)"]; +"34 slice_1" -> "37 cat" [style=solid, label="(1, 4, 3, 8)"]; +"35 slice_2" -> "36 neg" [style=solid, label="(1, 4, 3, 8)"]; +"36 neg" -> "37 cat" [style=solid, label="(1, 4, 3, 8)"]; +"37 cat" -> "38 mul_3" [style=solid, label="(1, 4, 3, 16)"]; +"38 mul_3" -> "39 add_1" [style=solid, label="(1, 4, 3, 16)"]; +"39 add_1" -> "47 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; +"40 mul_4" -> "46 add_2" [style=solid, label="(1, 4, 3, 16)"]; +"41 slice_3" -> "44 cat_1" [style=solid, label="(1, 4, 3, 8)"]; +"42 slice_4" -> "43 neg_1" [style=solid, label="(1, 4, 3, 8)"]; +"43 neg_1" -> "44 cat_1" [style=solid, label="(1, 4, 3, 8)"]; +"44 cat_1" -> "45 mul_5" [style=solid, label="(1, 4, 3, 16)"]; +"45 mul_5" -> "46 add_2" [style=solid, label="(1, 4, 3, 16)"]; +"46 add_2" -> "47 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; +"47 scaled_dot_product_attention" -> "48 transpose_3" [style=solid, label="(1, 4, 3, 16)"]; +"48 transpose_3" -> "49 view_3" [style=solid, label="(1, 3, 4, 16)"]; +"49 view_3" -> "52 linear_3" [style=solid, label="(1, 3, 64)"]; +"50 o_proj_weight_updated_constant0" -> "51 asymmetric_weights_decompressor_o_proj_weight_0" [style=solid, label="(64, 64)"]; +"51 asymmetric_weights_decompressor_o_proj_weight_0" -> "52 linear_3" [style=solid, label="(64, 64)"]; +"52 linear_3" -> "53 add_3" [style=solid, label="(1, 3, 64)"]; +"53 add_3" -> "54 _assert_tensor_metadata_default_2" [style=solid, label="(1, 3, 64)"]; +"53 add_3" -> "55 to_2" [style=solid, label="(1, 3, 64)"]; +"55 to_2" -> "56 pow_2" [style=solid, label="(1, 3, 64)"]; +"55 to_2" -> "60 mul_6" [style=solid, label="(1, 3, 64)"]; +"55 to_2" -> "75 add_5" [style=solid, label="(1, 3, 64)"]; +"56 pow_2" -> "57 mean_1" [style=solid, label="(1, 3, 64)"]; +"57 mean_1" -> "58 add_4" [style=solid, label="(1, 3, 1)"]; +"58 add_4" -> "59 rsqrt_1" [style=solid, label="(1, 3, 1)"]; +"59 rsqrt_1" -> "60 mul_6" [style=solid, label="(1, 3, 1)"]; +"60 mul_6" -> "61 _assert_tensor_metadata_default_3" [style=solid, label="(1, 3, 64)"]; +"60 mul_6" -> "62 to_3" [style=solid, label="(1, 3, 64)"]; +"62 to_3" -> "63 mul_7" [style=solid, label="(1, 3, 64)"]; +"63 mul_7" -> "66 linear_4" [style=solid, label="(1, 3, 64)"]; +"63 mul_7" -> "70 linear_5" [style=solid, label="(1, 3, 64)"]; +"64 mlp_gate_proj_weight_updated_constant0" -> "65 asymmetric_weights_decompressor_mlp_gate_proj_weight_0" [style=solid, label="(128, 64)"]; +"65 asymmetric_weights_decompressor_mlp_gate_proj_weight_0" -> "66 linear_4" [style=solid, label="(128, 64)"]; +"66 linear_4" -> "67 silu" [style=solid, label="(1, 3, 128)"]; +"67 silu" -> "71 mul_8" [style=solid, label="(1, 3, 128)"]; +"68 mlp_up_proj_weight_updated_constant0" -> "69 asymmetric_weights_decompressor_mlp_up_proj_weight_0" [style=solid, label="(128, 64)"]; +"69 asymmetric_weights_decompressor_mlp_up_proj_weight_0" -> "70 linear_5" [style=solid, label="(128, 64)"]; +"70 linear_5" -> "71 mul_8" [style=solid, label="(1, 3, 128)"]; +"71 mul_8" -> "74 linear_6" [style=solid, label="(1, 3, 128)"]; +"72 mlp_down_proj_weight_updated_constant0" -> "73 asymmetric_weights_decompressor_mlp_down_proj_weight_0" [style=solid, label="(64, 128)"]; +"73 asymmetric_weights_decompressor_mlp_down_proj_weight_0" -> "74 linear_6" [style=solid, label="(64, 128)"]; +"74 linear_6" -> "75 add_5" [style=solid, label="(1, 3, 64)"]; +"75 add_5" -> "76 output" [style=solid, label="(1, 3, 64)"]; +} diff --git a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_False_ref_wc_param.json b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_False_ref_wc_param.json new file mode 100644 index 00000000000..fd8fbda6f54 --- /dev/null +++ b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_False_ref_wc_param.json @@ -0,0 +1,38 @@ +[ + { + "weight_name": "wte_weight_1", + "node_with_weight": "embedding", + "weight_port_id": 0, + "weight_dtype": "float32", + "weight_shape": [ + 128, + 64 + ], + "reduction_axes": [ + 1 + ], + "compression_config": { + "mode": "int8_asym", + "group_size": -1, + "codebook_values": null + } + }, + { + "weight_name": "linear_weight", + "node_with_weight": "linear", + "weight_port_id": 1, + "weight_dtype": "float32", + "weight_shape": [ + 64, + 64 + ], + "reduction_axes": [ + 1 + ], + "compression_config": { + "mode": "int4_sym", + "group_size": 32, + "codebook_values": null + } + } +] \ No newline at end of file diff --git a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_False_sensitivity_metric_hessian_input_activation.dot b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_False_sensitivity_metric_hessian_input_activation.dot new file mode 100644 index 00000000000..b249fdf7ce3 --- /dev/null +++ b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_False_sensitivity_metric_hessian_input_activation.dot @@ -0,0 +1,24 @@ +strict digraph { +"0 linear_bias" [id=0, type="get_attr"]; +"1 lm_head_bias" [id=1, type="get_attr"]; +"2 input_ids" [id=2, type=input]; +"3 wte_weight_1_updated_constant0" [id=3, type="get_attr"]; +"4 asymmetric_weights_decompressor_wte_weight_1_0" [id=4, type="call_module"]; +"5 embedding" [id=5, type=embedding]; +"6 linear_weight_updated_constant0" [id=6, type="get_attr"]; +"7 asymmetric_weights_decompressor_linear_weight_0" [id=7, type="call_module"]; +"8 linear" [id=8, type=linear]; +"9 linear_1" [id=9, type=linear]; +"10 output" [id=10, type=output]; +"0 linear_bias" -> "8 linear" [style=solid, label="(64,)"]; +"1 lm_head_bias" -> "9 linear_1" [style=solid, label="(128,)"]; +"2 input_ids" -> "5 embedding" [style=solid, label="(5,)"]; +"3 wte_weight_1_updated_constant0" -> "4 asymmetric_weights_decompressor_wte_weight_1_0" [style=solid, label="(128, 64)"]; +"4 asymmetric_weights_decompressor_wte_weight_1_0" -> "5 embedding" [style=solid, label="(128, 64)"]; +"4 asymmetric_weights_decompressor_wte_weight_1_0" -> "9 linear_1" [style=solid, label="(128, 64)"]; +"5 embedding" -> "8 linear" [style=solid, label="(5, 64)"]; +"6 linear_weight_updated_constant0" -> "7 asymmetric_weights_decompressor_linear_weight_0" [style=solid, label="(64, 64)"]; +"7 asymmetric_weights_decompressor_linear_weight_0" -> "8 linear" [style=solid, label="(64, 64)"]; +"8 linear" -> "9 linear_1" [style=solid, label="(5, 64)"]; +"9 linear_1" -> "10 output" [style=solid, label="(5, 128)"]; +} diff --git a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_False_sensitivity_metric_max_activation_variance.dot b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_False_sensitivity_metric_max_activation_variance.dot new file mode 100644 index 00000000000..b249fdf7ce3 --- /dev/null +++ b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_False_sensitivity_metric_max_activation_variance.dot @@ -0,0 +1,24 @@ +strict digraph { +"0 linear_bias" [id=0, type="get_attr"]; +"1 lm_head_bias" [id=1, type="get_attr"]; +"2 input_ids" [id=2, type=input]; +"3 wte_weight_1_updated_constant0" [id=3, type="get_attr"]; +"4 asymmetric_weights_decompressor_wte_weight_1_0" [id=4, type="call_module"]; +"5 embedding" [id=5, type=embedding]; +"6 linear_weight_updated_constant0" [id=6, type="get_attr"]; +"7 asymmetric_weights_decompressor_linear_weight_0" [id=7, type="call_module"]; +"8 linear" [id=8, type=linear]; +"9 linear_1" [id=9, type=linear]; +"10 output" [id=10, type=output]; +"0 linear_bias" -> "8 linear" [style=solid, label="(64,)"]; +"1 lm_head_bias" -> "9 linear_1" [style=solid, label="(128,)"]; +"2 input_ids" -> "5 embedding" [style=solid, label="(5,)"]; +"3 wte_weight_1_updated_constant0" -> "4 asymmetric_weights_decompressor_wte_weight_1_0" [style=solid, label="(128, 64)"]; +"4 asymmetric_weights_decompressor_wte_weight_1_0" -> "5 embedding" [style=solid, label="(128, 64)"]; +"4 asymmetric_weights_decompressor_wte_weight_1_0" -> "9 linear_1" [style=solid, label="(128, 64)"]; +"5 embedding" -> "8 linear" [style=solid, label="(5, 64)"]; +"6 linear_weight_updated_constant0" -> "7 asymmetric_weights_decompressor_linear_weight_0" [style=solid, label="(64, 64)"]; +"7 asymmetric_weights_decompressor_linear_weight_0" -> "8 linear" [style=solid, label="(64, 64)"]; +"8 linear" -> "9 linear_1" [style=solid, label="(5, 64)"]; +"9 linear_1" -> "10 output" [style=solid, label="(5, 128)"]; +} diff --git a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_False_sensitivity_metric_mean_activation_magnitude.dot b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_False_sensitivity_metric_mean_activation_magnitude.dot new file mode 100644 index 00000000000..b249fdf7ce3 --- /dev/null +++ b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_False_sensitivity_metric_mean_activation_magnitude.dot @@ -0,0 +1,24 @@ +strict digraph { +"0 linear_bias" [id=0, type="get_attr"]; +"1 lm_head_bias" [id=1, type="get_attr"]; +"2 input_ids" [id=2, type=input]; +"3 wte_weight_1_updated_constant0" [id=3, type="get_attr"]; +"4 asymmetric_weights_decompressor_wte_weight_1_0" [id=4, type="call_module"]; +"5 embedding" [id=5, type=embedding]; +"6 linear_weight_updated_constant0" [id=6, type="get_attr"]; +"7 asymmetric_weights_decompressor_linear_weight_0" [id=7, type="call_module"]; +"8 linear" [id=8, type=linear]; +"9 linear_1" [id=9, type=linear]; +"10 output" [id=10, type=output]; +"0 linear_bias" -> "8 linear" [style=solid, label="(64,)"]; +"1 lm_head_bias" -> "9 linear_1" [style=solid, label="(128,)"]; +"2 input_ids" -> "5 embedding" [style=solid, label="(5,)"]; +"3 wte_weight_1_updated_constant0" -> "4 asymmetric_weights_decompressor_wte_weight_1_0" [style=solid, label="(128, 64)"]; +"4 asymmetric_weights_decompressor_wte_weight_1_0" -> "5 embedding" [style=solid, label="(128, 64)"]; +"4 asymmetric_weights_decompressor_wte_weight_1_0" -> "9 linear_1" [style=solid, label="(128, 64)"]; +"5 embedding" -> "8 linear" [style=solid, label="(5, 64)"]; +"6 linear_weight_updated_constant0" -> "7 asymmetric_weights_decompressor_linear_weight_0" [style=solid, label="(64, 64)"]; +"7 asymmetric_weights_decompressor_linear_weight_0" -> "8 linear" [style=solid, label="(64, 64)"]; +"8 linear" -> "9 linear_1" [style=solid, label="(5, 64)"]; +"9 linear_1" -> "10 output" [style=solid, label="(5, 128)"]; +} diff --git a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_False_sensitivity_metric_mean_activation_variance.dot b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_False_sensitivity_metric_mean_activation_variance.dot new file mode 100644 index 00000000000..b249fdf7ce3 --- /dev/null +++ b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_False_sensitivity_metric_mean_activation_variance.dot @@ -0,0 +1,24 @@ +strict digraph { +"0 linear_bias" [id=0, type="get_attr"]; +"1 lm_head_bias" [id=1, type="get_attr"]; +"2 input_ids" [id=2, type=input]; +"3 wte_weight_1_updated_constant0" [id=3, type="get_attr"]; +"4 asymmetric_weights_decompressor_wte_weight_1_0" [id=4, type="call_module"]; +"5 embedding" [id=5, type=embedding]; +"6 linear_weight_updated_constant0" [id=6, type="get_attr"]; +"7 asymmetric_weights_decompressor_linear_weight_0" [id=7, type="call_module"]; +"8 linear" [id=8, type=linear]; +"9 linear_1" [id=9, type=linear]; +"10 output" [id=10, type=output]; +"0 linear_bias" -> "8 linear" [style=solid, label="(64,)"]; +"1 lm_head_bias" -> "9 linear_1" [style=solid, label="(128,)"]; +"2 input_ids" -> "5 embedding" [style=solid, label="(5,)"]; +"3 wte_weight_1_updated_constant0" -> "4 asymmetric_weights_decompressor_wte_weight_1_0" [style=solid, label="(128, 64)"]; +"4 asymmetric_weights_decompressor_wte_weight_1_0" -> "5 embedding" [style=solid, label="(128, 64)"]; +"4 asymmetric_weights_decompressor_wte_weight_1_0" -> "9 linear_1" [style=solid, label="(128, 64)"]; +"5 embedding" -> "8 linear" [style=solid, label="(5, 64)"]; +"6 linear_weight_updated_constant0" -> "7 asymmetric_weights_decompressor_linear_weight_0" [style=solid, label="(64, 64)"]; +"7 asymmetric_weights_decompressor_linear_weight_0" -> "8 linear" [style=solid, label="(64, 64)"]; +"8 linear" -> "9 linear_1" [style=solid, label="(5, 64)"]; +"9 linear_1" -> "10 output" [style=solid, label="(5, 128)"]; +} diff --git a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_False_sensitivity_metric_weight_quantization_error.dot b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_False_sensitivity_metric_weight_quantization_error.dot new file mode 100644 index 00000000000..b249fdf7ce3 --- /dev/null +++ b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_False_sensitivity_metric_weight_quantization_error.dot @@ -0,0 +1,24 @@ +strict digraph { +"0 linear_bias" [id=0, type="get_attr"]; +"1 lm_head_bias" [id=1, type="get_attr"]; +"2 input_ids" [id=2, type=input]; +"3 wte_weight_1_updated_constant0" [id=3, type="get_attr"]; +"4 asymmetric_weights_decompressor_wte_weight_1_0" [id=4, type="call_module"]; +"5 embedding" [id=5, type=embedding]; +"6 linear_weight_updated_constant0" [id=6, type="get_attr"]; +"7 asymmetric_weights_decompressor_linear_weight_0" [id=7, type="call_module"]; +"8 linear" [id=8, type=linear]; +"9 linear_1" [id=9, type=linear]; +"10 output" [id=10, type=output]; +"0 linear_bias" -> "8 linear" [style=solid, label="(64,)"]; +"1 lm_head_bias" -> "9 linear_1" [style=solid, label="(128,)"]; +"2 input_ids" -> "5 embedding" [style=solid, label="(5,)"]; +"3 wte_weight_1_updated_constant0" -> "4 asymmetric_weights_decompressor_wte_weight_1_0" [style=solid, label="(128, 64)"]; +"4 asymmetric_weights_decompressor_wte_weight_1_0" -> "5 embedding" [style=solid, label="(128, 64)"]; +"4 asymmetric_weights_decompressor_wte_weight_1_0" -> "9 linear_1" [style=solid, label="(128, 64)"]; +"5 embedding" -> "8 linear" [style=solid, label="(5, 64)"]; +"6 linear_weight_updated_constant0" -> "7 asymmetric_weights_decompressor_linear_weight_0" [style=solid, label="(64, 64)"]; +"7 asymmetric_weights_decompressor_linear_weight_0" -> "8 linear" [style=solid, label="(64, 64)"]; +"8 linear" -> "9 linear_1" [style=solid, label="(5, 64)"]; +"9 linear_1" -> "10 output" [style=solid, label="(5, 128)"]; +} diff --git a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_True_ref_wc_param.json b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_True_ref_wc_param.json new file mode 100644 index 00000000000..81205ac2ca8 --- /dev/null +++ b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_True_ref_wc_param.json @@ -0,0 +1,38 @@ +[ + { + "weight_name": "wte_weight_1", + "node_with_weight": "embedding", + "weight_port_id": 0, + "weight_dtype": "float32", + "weight_shape": [ + 128, + 64 + ], + "reduction_axes": [ + 1 + ], + "compression_config": { + "mode": "int4_sym", + "group_size": 32, + "codebook_values": null + } + }, + { + "weight_name": "linear_weight", + "node_with_weight": "linear", + "weight_port_id": 1, + "weight_dtype": "float32", + "weight_shape": [ + 64, + 64 + ], + "reduction_axes": [ + 1 + ], + "compression_config": { + "mode": "int4_sym", + "group_size": 32, + "codebook_values": null + } + } +] \ No newline at end of file diff --git a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_True_sensitivity_metric_hessian_input_activation.dot b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_True_sensitivity_metric_hessian_input_activation.dot new file mode 100644 index 00000000000..0a7bb5fe8f8 --- /dev/null +++ b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_True_sensitivity_metric_hessian_input_activation.dot @@ -0,0 +1,24 @@ +strict digraph { +"0 linear_bias" [id=0, type="get_attr"]; +"1 lm_head_bias" [id=1, type="get_attr"]; +"2 input_ids" [id=2, type=input]; +"3 wte_weight_1_updated_constant0" [id=3, type="get_attr"]; +"4 symmetric_weights_decompressor_wte_weight_1_0" [id=4, type="call_module"]; +"5 embedding" [id=5, type=embedding]; +"6 linear_weight_updated_constant0" [id=6, type="get_attr"]; +"7 asymmetric_weights_decompressor_linear_weight_0" [id=7, type="call_module"]; +"8 linear" [id=8, type=linear]; +"9 linear_1" [id=9, type=linear]; +"10 output" [id=10, type=output]; +"0 linear_bias" -> "8 linear" [style=solid, label="(64,)"]; +"1 lm_head_bias" -> "9 linear_1" [style=solid, label="(128,)"]; +"2 input_ids" -> "5 embedding" [style=solid, label="(5,)"]; +"3 wte_weight_1_updated_constant0" -> "4 symmetric_weights_decompressor_wte_weight_1_0" [style=solid, label="(4096, 1)"]; +"4 symmetric_weights_decompressor_wte_weight_1_0" -> "5 embedding" [style=solid, label="(128, 64)"]; +"4 symmetric_weights_decompressor_wte_weight_1_0" -> "9 linear_1" [style=solid, label="(128, 64)"]; +"5 embedding" -> "8 linear" [style=solid, label="(5, 64)"]; +"6 linear_weight_updated_constant0" -> "7 asymmetric_weights_decompressor_linear_weight_0" [style=solid, label="(64, 64)"]; +"7 asymmetric_weights_decompressor_linear_weight_0" -> "8 linear" [style=solid, label="(64, 64)"]; +"8 linear" -> "9 linear_1" [style=solid, label="(5, 64)"]; +"9 linear_1" -> "10 output" [style=solid, label="(5, 128)"]; +} diff --git a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_True_sensitivity_metric_max_activation_variance.dot b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_True_sensitivity_metric_max_activation_variance.dot new file mode 100644 index 00000000000..0a7bb5fe8f8 --- /dev/null +++ b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_True_sensitivity_metric_max_activation_variance.dot @@ -0,0 +1,24 @@ +strict digraph { +"0 linear_bias" [id=0, type="get_attr"]; +"1 lm_head_bias" [id=1, type="get_attr"]; +"2 input_ids" [id=2, type=input]; +"3 wte_weight_1_updated_constant0" [id=3, type="get_attr"]; +"4 symmetric_weights_decompressor_wte_weight_1_0" [id=4, type="call_module"]; +"5 embedding" [id=5, type=embedding]; +"6 linear_weight_updated_constant0" [id=6, type="get_attr"]; +"7 asymmetric_weights_decompressor_linear_weight_0" [id=7, type="call_module"]; +"8 linear" [id=8, type=linear]; +"9 linear_1" [id=9, type=linear]; +"10 output" [id=10, type=output]; +"0 linear_bias" -> "8 linear" [style=solid, label="(64,)"]; +"1 lm_head_bias" -> "9 linear_1" [style=solid, label="(128,)"]; +"2 input_ids" -> "5 embedding" [style=solid, label="(5,)"]; +"3 wte_weight_1_updated_constant0" -> "4 symmetric_weights_decompressor_wte_weight_1_0" [style=solid, label="(4096, 1)"]; +"4 symmetric_weights_decompressor_wte_weight_1_0" -> "5 embedding" [style=solid, label="(128, 64)"]; +"4 symmetric_weights_decompressor_wte_weight_1_0" -> "9 linear_1" [style=solid, label="(128, 64)"]; +"5 embedding" -> "8 linear" [style=solid, label="(5, 64)"]; +"6 linear_weight_updated_constant0" -> "7 asymmetric_weights_decompressor_linear_weight_0" [style=solid, label="(64, 64)"]; +"7 asymmetric_weights_decompressor_linear_weight_0" -> "8 linear" [style=solid, label="(64, 64)"]; +"8 linear" -> "9 linear_1" [style=solid, label="(5, 64)"]; +"9 linear_1" -> "10 output" [style=solid, label="(5, 128)"]; +} diff --git a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_True_sensitivity_metric_mean_activation_magnitude.dot b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_True_sensitivity_metric_mean_activation_magnitude.dot new file mode 100644 index 00000000000..0a7bb5fe8f8 --- /dev/null +++ b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_True_sensitivity_metric_mean_activation_magnitude.dot @@ -0,0 +1,24 @@ +strict digraph { +"0 linear_bias" [id=0, type="get_attr"]; +"1 lm_head_bias" [id=1, type="get_attr"]; +"2 input_ids" [id=2, type=input]; +"3 wte_weight_1_updated_constant0" [id=3, type="get_attr"]; +"4 symmetric_weights_decompressor_wte_weight_1_0" [id=4, type="call_module"]; +"5 embedding" [id=5, type=embedding]; +"6 linear_weight_updated_constant0" [id=6, type="get_attr"]; +"7 asymmetric_weights_decompressor_linear_weight_0" [id=7, type="call_module"]; +"8 linear" [id=8, type=linear]; +"9 linear_1" [id=9, type=linear]; +"10 output" [id=10, type=output]; +"0 linear_bias" -> "8 linear" [style=solid, label="(64,)"]; +"1 lm_head_bias" -> "9 linear_1" [style=solid, label="(128,)"]; +"2 input_ids" -> "5 embedding" [style=solid, label="(5,)"]; +"3 wte_weight_1_updated_constant0" -> "4 symmetric_weights_decompressor_wte_weight_1_0" [style=solid, label="(4096, 1)"]; +"4 symmetric_weights_decompressor_wte_weight_1_0" -> "5 embedding" [style=solid, label="(128, 64)"]; +"4 symmetric_weights_decompressor_wte_weight_1_0" -> "9 linear_1" [style=solid, label="(128, 64)"]; +"5 embedding" -> "8 linear" [style=solid, label="(5, 64)"]; +"6 linear_weight_updated_constant0" -> "7 asymmetric_weights_decompressor_linear_weight_0" [style=solid, label="(64, 64)"]; +"7 asymmetric_weights_decompressor_linear_weight_0" -> "8 linear" [style=solid, label="(64, 64)"]; +"8 linear" -> "9 linear_1" [style=solid, label="(5, 64)"]; +"9 linear_1" -> "10 output" [style=solid, label="(5, 128)"]; +} diff --git a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_True_sensitivity_metric_mean_activation_variance.dot b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_True_sensitivity_metric_mean_activation_variance.dot new file mode 100644 index 00000000000..0a7bb5fe8f8 --- /dev/null +++ b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_True_sensitivity_metric_mean_activation_variance.dot @@ -0,0 +1,24 @@ +strict digraph { +"0 linear_bias" [id=0, type="get_attr"]; +"1 lm_head_bias" [id=1, type="get_attr"]; +"2 input_ids" [id=2, type=input]; +"3 wte_weight_1_updated_constant0" [id=3, type="get_attr"]; +"4 symmetric_weights_decompressor_wte_weight_1_0" [id=4, type="call_module"]; +"5 embedding" [id=5, type=embedding]; +"6 linear_weight_updated_constant0" [id=6, type="get_attr"]; +"7 asymmetric_weights_decompressor_linear_weight_0" [id=7, type="call_module"]; +"8 linear" [id=8, type=linear]; +"9 linear_1" [id=9, type=linear]; +"10 output" [id=10, type=output]; +"0 linear_bias" -> "8 linear" [style=solid, label="(64,)"]; +"1 lm_head_bias" -> "9 linear_1" [style=solid, label="(128,)"]; +"2 input_ids" -> "5 embedding" [style=solid, label="(5,)"]; +"3 wte_weight_1_updated_constant0" -> "4 symmetric_weights_decompressor_wte_weight_1_0" [style=solid, label="(4096, 1)"]; +"4 symmetric_weights_decompressor_wte_weight_1_0" -> "5 embedding" [style=solid, label="(128, 64)"]; +"4 symmetric_weights_decompressor_wte_weight_1_0" -> "9 linear_1" [style=solid, label="(128, 64)"]; +"5 embedding" -> "8 linear" [style=solid, label="(5, 64)"]; +"6 linear_weight_updated_constant0" -> "7 asymmetric_weights_decompressor_linear_weight_0" [style=solid, label="(64, 64)"]; +"7 asymmetric_weights_decompressor_linear_weight_0" -> "8 linear" [style=solid, label="(64, 64)"]; +"8 linear" -> "9 linear_1" [style=solid, label="(5, 64)"]; +"9 linear_1" -> "10 output" [style=solid, label="(5, 128)"]; +} diff --git a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_True_sensitivity_metric_weight_quantization_error.dot b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_True_sensitivity_metric_weight_quantization_error.dot new file mode 100644 index 00000000000..0a7bb5fe8f8 --- /dev/null +++ b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_True_sensitivity_metric_weight_quantization_error.dot @@ -0,0 +1,24 @@ +strict digraph { +"0 linear_bias" [id=0, type="get_attr"]; +"1 lm_head_bias" [id=1, type="get_attr"]; +"2 input_ids" [id=2, type=input]; +"3 wte_weight_1_updated_constant0" [id=3, type="get_attr"]; +"4 symmetric_weights_decompressor_wte_weight_1_0" [id=4, type="call_module"]; +"5 embedding" [id=5, type=embedding]; +"6 linear_weight_updated_constant0" [id=6, type="get_attr"]; +"7 asymmetric_weights_decompressor_linear_weight_0" [id=7, type="call_module"]; +"8 linear" [id=8, type=linear]; +"9 linear_1" [id=9, type=linear]; +"10 output" [id=10, type=output]; +"0 linear_bias" -> "8 linear" [style=solid, label="(64,)"]; +"1 lm_head_bias" -> "9 linear_1" [style=solid, label="(128,)"]; +"2 input_ids" -> "5 embedding" [style=solid, label="(5,)"]; +"3 wte_weight_1_updated_constant0" -> "4 symmetric_weights_decompressor_wte_weight_1_0" [style=solid, label="(4096, 1)"]; +"4 symmetric_weights_decompressor_wte_weight_1_0" -> "5 embedding" [style=solid, label="(128, 64)"]; +"4 symmetric_weights_decompressor_wte_weight_1_0" -> "9 linear_1" [style=solid, label="(128, 64)"]; +"5 embedding" -> "8 linear" [style=solid, label="(5, 64)"]; +"6 linear_weight_updated_constant0" -> "7 asymmetric_weights_decompressor_linear_weight_0" [style=solid, label="(64, 64)"]; +"7 asymmetric_weights_decompressor_linear_weight_0" -> "8 linear" [style=solid, label="(64, 64)"]; +"8 linear" -> "9 linear_1" [style=solid, label="(5, 64)"]; +"9 linear_1" -> "10 output" [style=solid, label="(5, 128)"]; +} diff --git a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int8wo_asym_gs-1_ratio1_all_layers_False_ref_wc_param.json b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int8wo_asym_gs-1_ratio1_all_layers_False_ref_wc_param.json new file mode 100644 index 00000000000..49d45c1fffb --- /dev/null +++ b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int8wo_asym_gs-1_ratio1_all_layers_False_ref_wc_param.json @@ -0,0 +1,38 @@ +[ + { + "weight_name": "wte_weight_1", + "node_with_weight": "embedding", + "weight_port_id": 0, + "weight_dtype": "float32", + "weight_shape": [ + 128, + 64 + ], + "reduction_axes": [ + 1 + ], + "compression_config": { + "mode": "int8_asym", + "group_size": -1, + "codebook_values": null + } + }, + { + "weight_name": "linear_weight", + "node_with_weight": "linear", + "weight_port_id": 1, + "weight_dtype": "float32", + "weight_shape": [ + 64, + 64 + ], + "reduction_axes": [ + 1 + ], + "compression_config": { + "mode": "int8_asym", + "group_size": -1, + "codebook_values": null + } + } +] \ No newline at end of file diff --git a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int8wo_asym_gs-1_ratio1_all_layers_False_sensitivity_metric_None.dot b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int8wo_asym_gs-1_ratio1_all_layers_False_sensitivity_metric_None.dot new file mode 100644 index 00000000000..b249fdf7ce3 --- /dev/null +++ b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int8wo_asym_gs-1_ratio1_all_layers_False_sensitivity_metric_None.dot @@ -0,0 +1,24 @@ +strict digraph { +"0 linear_bias" [id=0, type="get_attr"]; +"1 lm_head_bias" [id=1, type="get_attr"]; +"2 input_ids" [id=2, type=input]; +"3 wte_weight_1_updated_constant0" [id=3, type="get_attr"]; +"4 asymmetric_weights_decompressor_wte_weight_1_0" [id=4, type="call_module"]; +"5 embedding" [id=5, type=embedding]; +"6 linear_weight_updated_constant0" [id=6, type="get_attr"]; +"7 asymmetric_weights_decompressor_linear_weight_0" [id=7, type="call_module"]; +"8 linear" [id=8, type=linear]; +"9 linear_1" [id=9, type=linear]; +"10 output" [id=10, type=output]; +"0 linear_bias" -> "8 linear" [style=solid, label="(64,)"]; +"1 lm_head_bias" -> "9 linear_1" [style=solid, label="(128,)"]; +"2 input_ids" -> "5 embedding" [style=solid, label="(5,)"]; +"3 wte_weight_1_updated_constant0" -> "4 asymmetric_weights_decompressor_wte_weight_1_0" [style=solid, label="(128, 64)"]; +"4 asymmetric_weights_decompressor_wte_weight_1_0" -> "5 embedding" [style=solid, label="(128, 64)"]; +"4 asymmetric_weights_decompressor_wte_weight_1_0" -> "9 linear_1" [style=solid, label="(128, 64)"]; +"5 embedding" -> "8 linear" [style=solid, label="(5, 64)"]; +"6 linear_weight_updated_constant0" -> "7 asymmetric_weights_decompressor_linear_weight_0" [style=solid, label="(64, 64)"]; +"7 asymmetric_weights_decompressor_linear_weight_0" -> "8 linear" [style=solid, label="(64, 64)"]; +"8 linear" -> "9 linear_1" [style=solid, label="(5, 64)"]; +"9 linear_1" -> "10 output" [style=solid, label="(5, 128)"]; +}