Remove SoftmaxQuantizer (#14089)

JakeStevens · facebook-github-bot · commit fc4d0b672a85 · 2025-09-09T06:30:23.000-07:00
Summary: Pull Request resolved: #14089 Softmax not supported on current platforms. This introduces quant/dequants into the graph, which are unneeded. This diff removes the quantizer until supported. Differential Revision: D81964057
diff --git a/backends/nxp/quantizer/neutron_quantizer.py b/backends/nxp/quantizer/neutron_quantizer.py
@@ -35,7 +35,6 @@
     ReshapePattern,
     SharedSpecPattern,
     SigmoidPattern,
-    SoftMaxPattern,
     TanhInPlacePattern,
     TanhPattern,
     ViewPattern,
@@ -225,7 +224,6 @@ def __init__(self):
                 NeutronAtenQuantizer(ReluInPlacePattern(), static_qconfig),
                 NeutronAtenQuantizer(ReshapePattern(), static_qconfig),
                 NeutronAtenQuantizer(SigmoidPattern(), static_qconfig),
-                NeutronAtenQuantizer(SoftMaxPattern(), static_qconfig),
                 NeutronAtenQuantizer(TanhPattern(), static_qconfig),
                 NeutronAtenQuantizer(TanhInPlacePattern(), static_qconfig),
                 NeutronAtenQuantizer(ViewPattern(), static_qconfig),
diff --git a/backends/nxp/tests/ir/edge_passes/test_remove_io_quant_ops_pass.py b/backends/nxp/tests/ir/edge_passes/test_remove_io_quant_ops_pass.py
@@ -59,12 +59,13 @@ def test_remove_io_quant_ops_pass__cifarnet():
     )
 
     nodes = list(exec_prog.exported_program().graph.nodes)
-    assert len(nodes) == 11
+    assert len(nodes) == 9
     assert (
         nodes[0].meta["val"].dtype == torch.int8
     ), "Input tensor doesn't have type INT8."
+    # Currently, softmax is not quantized
     assert (
-        nodes[10].meta["val"][0].dtype == torch.int8
+        nodes[8].meta["val"][0].dtype == torch.float32
     ), "Output tensor doesn't have type INT8."
 
     assert (
diff --git a/backends/nxp/tests/test_edge_passes.py b/backends/nxp/tests/test_edge_passes.py
@@ -72,7 +72,7 @@ def unsupported_target(*_):
         exported_program = epm.exported_program()
 
         nodes = list(exported_program.graph_module.graph.nodes)
-        assert len(nodes) == 28
+        assert len(nodes) == 26
 
         view_copy_indices = _find_view_copy_node_indices(nodes)
 
diff --git a/backends/nxp/tests/test_integration.py b/backends/nxp/tests/test_integration.py
@@ -27,7 +27,7 @@ def test_conv_fc_softmax__to_executorch_program():
 
     delegation_info = get_delegation_info(program.graph_module)
     assert delegation_info.num_delegated_subgraphs == 1
-    assert delegation_info.num_non_delegated_nodes == 11
+    assert delegation_info.num_non_delegated_nodes == 7
     assert delegation_info.num_delegated_nodes == 13
 
     for node in program.graph.nodes:
@@ -43,7 +43,7 @@ def test_cifarnet():
 
     delegation_info = get_delegation_info(exec_prog.exported_program().graph_module)
     assert delegation_info.num_delegated_subgraphs == 1
-    assert delegation_info.num_non_delegated_nodes == 11
+    assert delegation_info.num_non_delegated_nodes == 7
     assert delegation_info.num_delegated_nodes == 45
 
     nodes = list(exec_prog.exported_program().graph.nodes)
diff --git a/backends/nxp/tests/test_quantizer.py b/backends/nxp/tests/test_quantizer.py
@@ -131,43 +131,6 @@ def test_quantizer_maxpool2d():
     assert input_quant == output_quant
 
 
-def test_quantizer_softmax():
-    model = models.SoftmaxModule(dim=0)
-    model.eval()
-
-    example_input = (torch.ones(1, 10),)
-    quantizer = NeutronQuantizer()
-    graph_module = torch.export.export(model, example_input, strict=True).module()
-
-    # noinspection PyTypeChecker
-    m = prepare_pt2e(graph_module, quantizer)
-    m(*example_input)
-    m = convert_pt2e(m)
-
-    # Dry run
-    m(*example_input)
-
-    nodes = list(m.graph.nodes)
-    assert len(nodes) == 7
-    # Check if QDQ pattern:
-    assert nodes[3].name == "softmax"
-    assert (
-        _get_target_name(nodes[3].args[0])
-        == "torch.ops.quantized_decomposed.dequantize_per_tensor.default"
-    )
-    assert (
-        _get_target_name(nodes[4])
-        == "torch.ops.quantized_decomposed.quantize_per_tensor.default"
-    )
-    assert nodes[4].args[0].name == "softmax"
-
-    # Check output quantization
-    scale, zp, _, _, dtype = nodes[4].args[1:]
-    assert scale == 1.0 / 256.0
-    assert zp == -128
-    assert dtype == torch.int8
-
-
 def test_quantizer_single_maxpool2d():
     model = models.MaxPool2dModule()
     model.eval()