Update on "[Executorch] Add quantized kv cache to oss ci"

kimishpatel · kimishpatel · commit b015d8023ae7 · 2024-12-04T18:28:44.000-08:00
Fixes to make sure quantized kv cache works in oss Differential Revision: [D66269487](https://our.internmc.facebook.com/intern/diff/D66269487/) [ghstack-poisoned]
diff --git a/examples/models/llama/source_transformation/quantized_kv_cache.py b/examples/models/llama/source_transformation/quantized_kv_cache.py
@@ -249,6 +249,8 @@ def from_float(cls, kv_cache, cache_type: QuantizedCacheType):
 def replace_kv_cache_with_quantized_kv_cache(module):
     # This is needed to ensure that custom ops are registered
     from executorch.extension.pybindings import portable_lib  # noqa # usort: skip
+    from executorch.extension.llm.custom_ops import custom_ops  # noqa: F401
+
     logging.warning(
         "Replacing KVCache with QuantizedKVCache. This modifies the model in place."
     )

Original file line number	Diff line number	Diff line change
`@@ -249,6 +249,8 @@ def from_float(cls, kv_cache, cache_type: QuantizedCacheType):`
`249`	`249`	`def replace_kv_cache_with_quantized_kv_cache(module):`
`250`	`250`	`# This is needed to ensure that custom ops are registered`
`251`	`251`	`from executorch.extension.pybindings import portable_lib # noqa # usort: skip`
	`252`	`+ from executorch.extension.llm.custom_ops import custom_ops # noqa: F401`
	`253`	`+`
`252`	`254`	`logging.warning(`
`253`	`255`	`"Replacing KVCache with QuantizedKVCache. This modifies the model in place."`
`254`	`256`	`)`