Update on "[Executorch] Add quantized kv cache to oss ci"

kimishpatel · kimishpatel · commit b95fea7ceb61 · 2024-12-04T12:42:26.000-08:00
Fixes to make sure quantized kv cache works in oss Differential Revision: [D66269487](https://our.internmc.facebook.com/intern/diff/D66269487/) [ghstack-poisoned]
diff --git a/examples/models/llama/source_transformation/quantized_kv_cache.py b/examples/models/llama/source_transformation/quantized_kv_cache.py
@@ -11,8 +11,6 @@
 import torch.nn as nn
 from executorch.examples.models.llama.llama_transformer import KVCache
 
-# This is needed to ensure that custom ops are registered
-from executorch.extension.pybindings import portable_lib  # noqa # usort: skip
 from torch.ao.quantization.fx._decomposed import quantized_decomposed_lib  # noqa: F401
 
 
@@ -235,6 +233,8 @@ def from_float(cls, kv_cache, cache_type: QuantizedCacheType):
 
 
 def replace_kv_cache_with_quantized_kv_cache(module):
+    # This is needed to ensure that custom ops are registered
+    from executorch.extension.pybindings import portable_lib  # noqa # usort: skip
     from executorch.extension.llm.custom_ops import custom_ops  # noqa: F401
 
     logging.warning(