Update on "[Executorch] Add quantized kv cache to oss ci"

kimishpatel · kimishpatel · commit 6f1efc5fa10d · 2024-12-04T17:04:59.000-08:00
Fixes to make sure quantized kv cache works in oss Differential Revision: [D66269487](https://our.internmc.facebook.com/intern/diff/D66269487/) [ghstack-poisoned]
diff --git a/examples/models/llama/source_transformation/quantized_kv_cache.py b/examples/models/llama/source_transformation/quantized_kv_cache.py
@@ -18,22 +18,7 @@
     op = torch.ops.quantized_decomposed.quantize_per_token.out
     assert op is not None
 except:
-    import glob
-
-    import executorch
-
-    # Ideally package is installed in only one location but usage of
-    # PYATHONPATH can result in multiple locations.
-    # ATM this is mainly used in CI for qnn runner. Will need to revisit this
-    executorch_package_path = executorch.__path__[-1]
-    libs = list(
-        glob.glob(
-            f"{executorch_package_path}/**/libquantized_ops_aot_lib.*", recursive=True
-        )
-    )
-    assert len(libs) == 1, f"Expected 1 library but got {len(libs)}"
-    logging.info(f"Loading custom ops library: {libs[0]}")
-    torch.ops.load_library(libs[0])
+    import executorch.kernels.quantized  # noqa: F401
     op = torch.ops.quantized_decomposed.quantize_per_token.out
     assert op is not None
 
diff --git a/kernels/quantized/__init__.py b/kernels/quantized/__init__.py
@@ -4,16 +4,26 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
+import logging
+
 try:
-    from pathlib import Path
+    import glob
 
-    libs = list(Path(__file__).parent.resolve().glob("**/libquantized_ops_aot_lib.*"))
-    del Path
-    assert len(libs) == 1, f"Expected 1 library but got {len(libs)}"
-    import torch as _torch
+    import torch
+    import executorch
 
-    _torch.ops.load_library(libs[0])
-    del _torch
+    # Ideally package is installed in only one location but usage of
+    # PYATHONPATH can result in multiple locations.
+    # ATM this is mainly used in CI for qnn runner. Will need to revisit this
+    executorch_package_path = executorch.__path__[-1]
+    libs = list(
+        glob.glob(
+            f"{executorch_package_path}/**/libquantized_ops_aot_lib.*", recursive=True
+        )
+    )
+    assert len(libs) == 1, f"Expected 1 library but got {len(libs)}"
+    logging.info(f"Loading custom ops library: {libs[0]}")
+    torch.ops.load_library(libs[0])
 except:
     import logging