From cc3f25017ba16773b7255895fe04a775b7ae235a Mon Sep 17 00:00:00 2001 From: Matthias Cremon Date: Sat, 4 Jan 2025 06:57:10 -0800 Subject: [PATCH] Call eval() in `quantize_pt2` Summary: This will make sure ALL calls going through there are in eval mode. In a subsequent diff, all calls will go through `quantize_pt2`, including fp32 cases which will use a nop quantizer and will allow further cleanup of the flow. Differential Revision: D67561642 --- backends/cadence/aot/compiler.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/backends/cadence/aot/compiler.py b/backends/cadence/aot/compiler.py index 5ddcfcf414b..c19a4296f6a 100644 --- a/backends/cadence/aot/compiler.py +++ b/backends/cadence/aot/compiler.py @@ -131,7 +131,10 @@ def quantize_pt2( Prepare, convert and fuse the model using the given quantizer. Returns a GraphModule with the quantized model. """ - # Quantizer + # Make the model inference mode by calling model.eval() + model.eval() + + # Instantiate the quantizer to CadenceQuantizer if not supplied if not quantizer: quantizer = CadenceQuantizer()