From cc3f25017ba16773b7255895fe04a775b7ae235a Mon Sep 17 00:00:00 2001
From: Matthias Cremon <matthiascremon@meta.com>
Date: Sat, 4 Jan 2025 06:57:10 -0800
Subject: [PATCH] Call eval() in `quantize_pt2`

Summary: This will make sure ALL calls going through there are in eval mode. In a subsequent diff, all calls will go through `quantize_pt2`, including fp32 cases which will use a nop quantizer and will allow further cleanup of the flow.

Differential Revision: D67561642
---
 backends/cadence/aot/compiler.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/backends/cadence/aot/compiler.py b/backends/cadence/aot/compiler.py
index 5ddcfcf414b..c19a4296f6a 100644
--- a/backends/cadence/aot/compiler.py
+++ b/backends/cadence/aot/compiler.py
@@ -131,7 +131,10 @@ def quantize_pt2(
     Prepare, convert and fuse the model using the given quantizer.
     Returns a GraphModule with the quantized model.
     """
-    # Quantizer
+    # Make the model inference mode by calling model.eval()
+    model.eval()
+
+    # Instantiate the quantizer to CadenceQuantizer if not supplied
     if not quantizer:
         quantizer = CadenceQuantizer()