Add ignored pattern for Positional Embedding from Segment Anything model (#3700)

nikita-savelyevv · web-flow · commit 208b2efa8f1d · 2025-10-28T10:16:40.000+02:00
### Changes As in the title. Example model: https://huggingface.co/facebook/sam-vit-base . Code: https://github.com/facebookresearch/segment-anything/blob/dca509fe793f601edb92606367a655c15ac00fdf/segment_anything/modeling/prompt_encoder.py#L171 . ### Reason for changes To automatically ignore positional embedding weight during weights compression. <img width="230" height="471" alt="image" src="https://github.com/user-attachments/assets/f0ff351b-a426-4076-b339-d883a04b4451" /> ### Related tickets 175083 ### Tests Added tests/cross_fw/test_templates/template_test_weights_compression.py::test_sam_pe_weight_compression
diff --git a/src/nncf/common/graph/patterns/patterns.py b/src/nncf/common/graph/patterns/patterns.py
@@ -408,3 +408,4 @@ class IgnoredPatternNames(Enum):
     FC_BN_HSWISH_ACTIVATION = PatternDesc("fc_bn_hswish_activation")
     EQUAL_LOGICALNOT = PatternDesc("equal_logicalnot")
     ROPE = PatternDesc("rope", model_types=[ModelType.TRANSFORMER])
+    SAM_PE = PatternDesc("sam_pe", model_types=[ModelType.TRANSFORMER])
diff --git a/src/nncf/onnx/quantization/ignored_patterns.py b/src/nncf/onnx/quantization/ignored_patterns.py
@@ -179,3 +179,29 @@ def create_rope() -> GraphPattern:
     pattern.add_edge(concat_node, cos_node)
     pattern.add_edge(concat_node, sin_node)
     return pattern
+
+
+@ONNX_IGNORED_PATTERNS.register(IgnoredPatternNames.SAM_PE)
+def create_sam_pe() -> GraphPattern:
+    """
+    Positional Embedding from Segment Anything Model (SAM).
+    """
+    pattern = GraphPattern()
+
+    matmul_node = pattern.add_node(
+        **{GraphPattern.LABEL_ATTR: "MATMUL", GraphPattern.METATYPE_ATTR: om.ONNXMatMulMetatype}
+    )
+    mul_node = pattern.add_node(
+        **{GraphPattern.LABEL_ATTR: "MULTIPLY", GraphPattern.METATYPE_ATTR: om.ONNXMulLayerMetatype}
+    )
+    cos_node = pattern.add_node(**{GraphPattern.LABEL_ATTR: "COS", GraphPattern.METATYPE_ATTR: om.ONNXCosMetatype})
+    sin_node = pattern.add_node(**{GraphPattern.LABEL_ATTR: "SIN", GraphPattern.METATYPE_ATTR: om.ONNXSinMetatype})
+    concat = pattern.add_node(**{GraphPattern.LABEL_ATTR: "CONCAT", GraphPattern.METATYPE_ATTR: om.ONNXConcatMetatype})
+
+    pattern.add_edge(matmul_node, mul_node)
+    pattern.add_edge(mul_node, cos_node)
+    pattern.add_edge(mul_node, sin_node)
+    pattern.add_edge(cos_node, concat)
+    pattern.add_edge(sin_node, concat)
+
+    return pattern
diff --git a/src/nncf/openvino/quantization/ignored_patterns.py b/src/nncf/openvino/quantization/ignored_patterns.py
@@ -186,3 +186,29 @@ def create_rope() -> GraphPattern:
     pattern.add_edge(concat_node, cos_node)
     pattern.add_edge(concat_node, sin_node)
     return pattern
+
+
+@OPENVINO_IGNORED_PATTERNS.register(IgnoredPatternNames.SAM_PE)
+def create_sam_pe() -> GraphPattern:
+    """
+    Positional Embedding from Segment Anything Model (SAM).
+    """
+    pattern = GraphPattern()
+
+    matmul_node = pattern.add_node(
+        **{GraphPattern.LABEL_ATTR: "MATMUL", GraphPattern.METATYPE_ATTR: om.OVMatMulMetatype}
+    )
+    mul_node = pattern.add_node(
+        **{GraphPattern.LABEL_ATTR: "MULTIPLY", GraphPattern.METATYPE_ATTR: om.OVMultiplyMetatype}
+    )
+    cos_node = pattern.add_node(**{GraphPattern.LABEL_ATTR: "COS", GraphPattern.METATYPE_ATTR: om.OVCosMetatype})
+    sin_node = pattern.add_node(**{GraphPattern.LABEL_ATTR: "SIN", GraphPattern.METATYPE_ATTR: om.OVSinMetatype})
+    concat = pattern.add_node(**{GraphPattern.LABEL_ATTR: "CONCAT", GraphPattern.METATYPE_ATTR: om.OVConcatMetatype})
+
+    pattern.add_edge(matmul_node, mul_node)
+    pattern.add_edge(mul_node, cos_node)
+    pattern.add_edge(mul_node, sin_node)
+    pattern.add_edge(cos_node, concat)
+    pattern.add_edge(sin_node, concat)
+
+    return pattern
diff --git a/src/nncf/quantization/algorithms/weight_compression/onnx_backend.py b/src/nncf/quantization/algorithms/weight_compression/onnx_backend.py
@@ -49,6 +49,7 @@
 from nncf.onnx.graph.transformations.command_creation import ONNXCommandCreator
 from nncf.onnx.graph.transformations.commands import ONNXTargetPoint
 from nncf.onnx.quantization.ignored_patterns import create_rope
+from nncf.onnx.quantization.ignored_patterns import create_sam_pe
 from nncf.parameters import CompressionFormat
 from nncf.parameters import CompressWeightsMode
 from nncf.quantization.advanced_parameters import AdvancedCompressionParameters
@@ -491,7 +492,9 @@ def _replace_matmul_with_matmulnbits(
 
     @staticmethod
     def get_ignored_patterns() -> GraphPattern:
-        return create_rope()
+        pattern = create_rope()
+        pattern.add_pattern_alternative(create_sam_pe())
+        return pattern
 
 
 class ONNXAWQAlgoAlgoBackend(AWQAlgoBackend, ONNXWeightCompressionAlgoBackend):
diff --git a/src/nncf/quantization/algorithms/weight_compression/openvino_backend.py b/src/nncf/quantization/algorithms/weight_compression/openvino_backend.py
@@ -43,6 +43,7 @@
 from nncf.openvino.optimized_functions import clear_ov_model_cache
 from nncf.openvino.optimized_functions.models import OV_MODEL_CACHE
 from nncf.openvino.quantization.ignored_patterns import create_rope
+from nncf.openvino.quantization.ignored_patterns import create_sam_pe
 from nncf.openvino.rt_info import dump_parameters
 from nncf.openvino.statistics.collectors import OVMaxVarianceReducer
 from nncf.openvino.statistics.collectors import OVMeanAbsMaxReducer
@@ -394,7 +395,9 @@ def filter_func(point: StatisticPoint) -> bool:
 
     @staticmethod
     def get_ignored_patterns() -> GraphPattern:
-        return create_rope()
+        pattern = create_rope()
+        pattern.add_pattern_alternative(create_sam_pe())
+        return pattern
 
 
 class OVTensorWeightCompressionAlgoBackend(OVWeightCompressionAlgoBackend):
diff --git a/src/nncf/quantization/algorithms/weight_compression/torch_backend.py b/src/nncf/quantization/algorithms/weight_compression/torch_backend.py
@@ -62,6 +62,7 @@
 from nncf.torch.model_transformer import PTModelTransformer
 from nncf.torch.nncf_network import NNCFNetwork
 from nncf.torch.quantization.ignored_patterns import create_rope
+from nncf.torch.quantization.ignored_patterns import create_sam_pe
 from nncf.torch.quantization.layers import QUANTIZATION_MODULES
 from nncf.torch.quantization.layers import INT4AsymmetricWeightsDecompressor
 from nncf.torch.quantization.layers import INT4SymmetricWeightsDecompressor
@@ -481,7 +482,9 @@ def transform_model(
 
     @staticmethod
     def get_ignored_patterns() -> GraphPattern:
-        return create_rope()
+        pattern = create_rope()
+        pattern.add_pattern_alternative(create_sam_pe())
+        return pattern
 
 
 class PTAWQAlgoAlgoBackend(AWQAlgoBackend, PTWeightCompressionAlgoBackend):
diff --git a/src/nncf/quantization/algorithms/weight_compression/torch_fx_backend.py b/src/nncf/quantization/algorithms/weight_compression/torch_fx_backend.py
@@ -57,6 +57,7 @@
 from nncf.torch.model_graph_manager import get_weight_compression_reduction_axes
 from nncf.torch.model_graph_manager import get_weight_tensor_port_ids
 from nncf.torch.quantization.ignored_patterns import create_rope
+from nncf.torch.quantization.ignored_patterns import create_sam_pe
 from nncf.torch.quantization.layers import INT4AsymmetricWeightsDecompressor
 from nncf.torch.quantization.layers import INT4SymmetricWeightsDecompressor
 from nncf.torch.quantization.layers import INT8AsymmetricWeightsDecompressor
@@ -257,7 +258,9 @@ def transform_model(
 
     @staticmethod
     def get_ignored_patterns() -> GraphPattern:
-        return create_rope()
+        pattern = create_rope()
+        pattern.add_pattern_alternative(create_sam_pe())
+        return pattern
 
 
 class FXMixedPrecisionAlgoBackend(MixedPrecisionAlgoBackend, FXWeightCompressionAlgoBackend):
diff --git a/src/nncf/torch/quantization/ignored_patterns.py b/src/nncf/torch/quantization/ignored_patterns.py
@@ -250,3 +250,27 @@ def create_rope() -> GraphPattern:
     pattern.add_edge(concat_node, cos_node)
     pattern.add_edge(concat_node, sin_node)
     return pattern
+
+
+@PT_IGNORED_PATTERNS.register(IgnoredPatternNames.SAM_PE)
+def create_sam_pe() -> GraphPattern:
+    """
+    Positional Embedding from Segment Anything Model (SAM).
+    """
+    pattern = GraphPattern()
+
+    matmul_node = pattern.add_node(
+        **{GraphPattern.LABEL_ATTR: "MATMUL", GraphPattern.METATYPE_ATTR: om.PTMatMulMetatype}
+    )
+    mul_node = pattern.add_node(**{GraphPattern.LABEL_ATTR: "MULTIPLY", GraphPattern.METATYPE_ATTR: om.PTMulMetatype})
+    cos_node = pattern.add_node(**{GraphPattern.LABEL_ATTR: "COS", GraphPattern.METATYPE_ATTR: om.PTCosMetatype})
+    sin_node = pattern.add_node(**{GraphPattern.LABEL_ATTR: "SIN", GraphPattern.METATYPE_ATTR: om.PTSinMetatype})
+    concat = pattern.add_node(**{GraphPattern.LABEL_ATTR: "CONCAT", GraphPattern.METATYPE_ATTR: om.PTCatMetatype})
+
+    pattern.add_edge(matmul_node, mul_node)
+    pattern.add_edge(mul_node, cos_node)
+    pattern.add_edge(mul_node, sin_node)
+    pattern.add_edge(cos_node, concat)
+    pattern.add_edge(sin_node, concat)
+
+    return pattern
diff --git a/tests/cross_fw/test_templates/helpers.py b/tests/cross_fw/test_templates/helpers.py
@@ -499,3 +499,24 @@ def forward(self, x):
         x1 = x.sin()
         x2 = x.cos()
         return x1, x2
+
+
+class SAMPEModel(nn.Module):
+    """
+    Positional Embedding from Segment Anything Model (SAM).
+    """
+
+    INPUT_SIZE = [1, 2, 3, 2]
+
+    def __init__(self):
+        super().__init__()
+        with set_torch_seed():
+            self.weight = nn.Parameter(torch.empty((2, 128)))
+
+    def forward(self, x):
+        x = torch.matmul(x, self.weight)
+        x = x * (2 * torch.pi)
+        x1 = x.sin()
+        x2 = x.cos()
+        x = torch.cat([x1, x2], dim=-1)
+        return x
diff --git a/tests/cross_fw/test_templates/template_test_weights_compression.py b/tests/cross_fw/test_templates/template_test_weights_compression.py
@@ -119,6 +119,11 @@ def get_matmul_model() -> TModel:
     def get_RoPE_model() -> TModel:
         """Returns a backend model for test_rope_weight_compression."""
 
+    @staticmethod
+    @abstractmethod
+    def get_SAM_PE_model() -> TModel:
+        """Returns a backend model for test_sam_pe_weight_compression."""
+
     @pytest.mark.parametrize(
         ("mode", "ref_act_score", "ref_score"),
         (
@@ -400,6 +405,26 @@ def test_rope_weight_compression(self):
         int4_num_nodes = self.get_num_int4_nodes(compressed_model)
         assert int4_num_nodes == int4_ref_num_compressed
 
+    def test_sam_pe_weight_compression(self):
+        model = self.get_SAM_PE_model()
+
+        dataset = Dataset(
+            [self.to_tensor(np.ones([1, 2, 3, 2], dtype=np.float32))],
+            self.get_transform_func(),
+        )
+        compressed_model = compress_weights(
+            model,
+            mode=CompressWeightsMode.INT4_SYM,
+            ratio=1.0,
+            group_size=-1,
+            dataset=dataset,
+            all_layers=True,
+        )
+
+        int4_ref_num_compressed = 0
+        int4_num_nodes = self.get_num_int4_nodes(compressed_model)
+        assert int4_num_nodes == int4_ref_num_compressed
+
     @staticmethod
     @abstractmethod
     def get_reference_for_test_awq_scale_reference() -> dict[str, Tensor]:
diff --git a/tests/onnx/common.py b/tests/onnx/common.py
@@ -164,6 +164,27 @@ def add_add(self, input_a: str, input_b: str, output: Optional[str] = None) -> s
         )
         return output
 
+    def add_mul_const(
+        self, input: str, shape: tuple[int], output: Optional[str] = None, data: Optional[np.ndarray] = None
+    ) -> str:
+        i = len(self._nodes)
+
+        w_name = f"W_{i}"
+        if data is None:
+            w_values = np.random.rand(*shape).astype(np.float32)
+        else:
+            w_values = data
+        w_initializer = onnx.helper.make_tensor(
+            name=w_name, data_type=onnx.TensorProto.FLOAT, dims=shape, vals=w_values.tobytes(), raw=True
+        )
+        self._initializers.append(w_initializer)
+
+        output = f"Mul_{i}_output" if output is None else output
+        self._nodes.append(
+            onnx.helper.make_node(op_type="Mul", inputs=[input, w_name], outputs=[output], name=f"Mul_{i}")
+        )
+        return output
+
     def add_relu(self, input: str, output: Optional[str] = None) -> str:
         i = len(self._nodes)
 
diff --git a/tests/onnx/quantization/test_weights_compression.py b/tests/onnx/quantization/test_weights_compression.py
@@ -395,6 +395,24 @@ def get_RoPE_model() -> onnx.ModelProto:
 
         return mb.build()
 
+    @staticmethod
+    def get_SAM_PE_model() -> onnx.ModelProto:
+        """
+        Builds a model to be used in the TemplateWeightCompression.test_sam_pe_weight_compression() test.
+        """
+        mb = ModelBuilder()
+
+        x = mb.add_input("input", (-1, -1, -1, 2))
+        x = mb.add_matmul(x, shape=(2, 128))
+        x = mb.add_mul_const(x, shape=(1,), data=np.array([2 * np.pi], np.float32))
+        x1 = mb.add_sin(x)
+        x2 = mb.add_cos(x)
+        x = mb.add_concat([x1, x2], axis=-1)
+
+        mb.add_output(x, (-1, -1, -1, 256))
+
+        return mb.build()
+
     @staticmethod
     def get_sequential_matmul_model() -> onnx.ModelProto:
         """
diff --git a/tests/openvino/native/models.py b/tests/openvino/native/models.py
@@ -1252,6 +1252,26 @@ def _create_ov_model(self):
         return model
 
 
+class SAMPEModel(OVReferenceModel):
+    """
+    Positional Embedding from Segment Anything Model (SAM).
+    """
+
+    def _create_ov_model(self):
+        inp = opset.parameter([-1, -1, -1, 2], name="inp")
+        matmul_data = self._rng.random((128, 2)).astype(np.float32)
+
+        matmul = opset.matmul(inp, matmul_data, transpose_a=False, transpose_b=True, name="MatMul")
+        scaled_matmul = opset.multiply(matmul, opset.constant(2 * np.pi, dtype=np.float32), name="Scaled_MatMul")
+        sin = opset.sin(scaled_matmul, name="sin")
+        cos = opset.cos(scaled_matmul, name="cos")
+        concat = opset.concat([sin, cos], axis=-1, name="concat")
+        concat_result = opset.result(concat, name="concat_result")
+
+        model = ov.Model([concat_result], [inp])
+        return model
+
+
 class MatMul(OVReferenceModel):
     def _create_ov_model(self):
         input_node = opset.parameter([1, 4, 8], name="Input")
diff --git a/tests/openvino/native/quantization/test_weights_compression.py b/tests/openvino/native/quantization/test_weights_compression.py
@@ -74,6 +74,7 @@
 from tests.openvino.native.models import ModelNamedConsts
 from tests.openvino.native.models import OVReferenceModel
 from tests.openvino.native.models import RoPEModel
+from tests.openvino.native.models import SAMPEModel
 from tests.openvino.native.models import SequentialMatmulModel
 from tests.openvino.native.models import WeightsModel
 from tests.openvino.native.quantization.test_fq_params_calculation import REFERENCE_SCALES_DIR
@@ -1929,6 +1930,10 @@ def get_matmul_model() -> ov.Model:
     def get_RoPE_model() -> ov.Model:
         return RoPEModel().ov_model
 
+    @staticmethod
+    def get_SAM_PE_model() -> ov.Model:
+        return SAMPEModel().ov_model
+
     @staticmethod
     def get_sequential_matmul_model() -> ov.Model:
         return SequentialMatmulModel().ov_model
diff --git a/tests/torch2/function_hook/quantization/test_weights_compression.py b/tests/torch2/function_hook/quantization/test_weights_compression.py
@@ -41,6 +41,7 @@
 from nncf.torch.quantization.quantize_functions import unpack_int4
 from nncf.torch.quantization.quantize_functions import unpack_uint4
 from tests.cross_fw.test_templates.helpers import RoPEModel
+from tests.cross_fw.test_templates.helpers import SAMPEModel
 from tests.cross_fw.test_templates.template_test_weights_compression import TemplateWeightCompression
 from tests.torch.test_models.synthetic import ShortTransformer
 from tests.torch.test_tensor import cast_to
@@ -480,6 +481,10 @@ def get_matmul_model() -> torch.nn.Module:
     def get_RoPE_model() -> torch.nn.Module:
         return RoPEModel()
 
+    @staticmethod
+    def get_SAM_PE_model() -> torch.nn.Module:
+        return SAMPEModel()
+
     @staticmethod
     def get_sequential_matmul_model() -> torch.nn.Module:
         return SequentialMatmulModel()
diff --git a/tests/torch2/fx/test_compress_weights.py b/tests/torch2/fx/test_compress_weights.py
@@ -29,6 +29,7 @@
 from nncf.torch.quantization.layers import INT4AsymmetricWeightsDecompressor
 from nncf.torch.quantization.layers import INT4SymmetricWeightsDecompressor
 from tests.cross_fw.test_templates.helpers import RoPEModel
+from tests.cross_fw.test_templates.helpers import SAMPEModel
 from tests.cross_fw.test_templates.template_test_weights_compression import TemplateWeightCompression
 from tests.torch.test_models.synthetic import ShortTransformer
 from tests.torch.test_tensor import cast_to
@@ -329,6 +330,13 @@ def get_RoPE_model() -> torch.fx.GraphModule:
         exported_model = get_torch_fx_model(model, ex_input)
         return exported_model
 
+    @staticmethod
+    def get_SAM_PE_model() -> torch.fx.GraphModule:
+        model = SAMPEModel()
+        ex_input = torch.ones(SAMPEModel.INPUT_SIZE, dtype=torch.float32)
+        exported_model = get_torch_fx_model(model, ex_input)
+        return exported_model
+
     @staticmethod
     def get_sequential_matmul_model() -> torch.fx.GraphModule:
         model = SequentialMatmulModel()