Update on "[Executorch] Add quantized kv cache to oss ci"

kimishpatel · kimishpatel · commit e49b3adc5c77 · 2024-12-04T18:09:26.000-08:00
Fixes to make sure quantized kv cache works in oss Differential Revision: [D66269487](https://our.internmc.facebook.com/intern/diff/D66269487/) [ghstack-poisoned]
diff --git a/examples/models/llama/source_transformation/quantized_kv_cache.py b/examples/models/llama/source_transformation/quantized_kv_cache.py
@@ -18,7 +18,24 @@
     op = torch.ops.quantized_decomposed.quantize_per_token.out
     assert op is not None
 except:
-    import executorch.kernels.quantized  # noqa: F401
+    import glob
+
+    import executorch
+
+    from executorch.extension.pybindings import portable_lib  # noqa # usort: skip
+
+    # Ideally package is installed in only one location but usage of
+    # PYATHONPATH can result in multiple locations.
+    # ATM this is mainly used in CI for qnn runner. Will need to revisit this
+    executorch_package_path = executorch.__path__[-1]
+    libs = list(
+        glob.glob(
+            f"{executorch_package_path}/**/libquantized_ops_aot_lib.*", recursive=True
+        )
+    )
+    assert len(libs) == 1, f"Expected 1 library but got {len(libs)}"
+    logging.info(f"Loading custom ops library: {libs[0]}")
+    torch.ops.load_library(libs[0])
     op = torch.ops.quantized_decomposed.quantize_per_token.out
     assert op is not None
 
@@ -230,8 +247,8 @@ def from_float(cls, kv_cache, cache_type: QuantizedCacheType):
 
 
 def replace_kv_cache_with_quantized_kv_cache(module):
-    from executorch.extension.llm.custom_ops import custom_ops  # noqa: F401
-
+    # This is needed to ensure that custom ops are registered
+    from executorch.extension.pybindings import portable_lib  # noqa # usort: skip
     logging.warning(
         "Replacing KVCache with QuantizedKVCache. This modifies the model in place."
     )
diff --git a/kernels/quantized/__init__.py b/kernels/quantized/__init__.py
@@ -4,25 +4,14 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
-import logging
-
 try:
-    import glob
+    from pathlib import Path
 
+    libs = list(Path(__file__).parent.resolve().glob("**/libquantized_ops_aot_lib.*"))
+    del Path
+    assert len(libs) == 1, f"Expected 1 library but got {len(libs)}"
     import torch as _torch
-    import executorch
 
-    # Ideally package is installed in only one location but usage of
-    # PYATHONPATH can result in multiple locations.
-    # ATM this is mainly used in CI for qnn runner. Will need to revisit this
-    executorch_package_path = executorch.__path__[-1]
-    libs = list(
-        glob.glob(
-            f"{executorch_package_path}/**/libquantized_ops_aot_lib.*", recursive=True
-        )
-    )
-    assert len(libs) == 1, f"Expected 1 library but got {len(libs)}"
-    logging.info(f"Loading custom ops library: {libs[0]}")
     _torch.ops.load_library(libs[0])
     del _torch
 except: