Clean up some broken references to fix turbine-llm scripts (#632)

KyleHerndon · web-flow · commit 9ca50de26125 · 2024-04-17T14:19:45.000-07:00
Cleanup some broken references to fix turbine-llm scripts

`gguf` used where `gguf_interop` should be
diff --git a/llm/requirements.txt b/llm/requirements.txt
@@ -0,0 +1 @@
+gguf
diff --git a/llm/turbine_llm/examples/export_paged_llm_v1.py b/llm/turbine_llm/examples/export_paged_llm_v1.py
@@ -10,10 +10,11 @@
 
 from shark_turbine.aot import *
 
-from ..layers import *
+from turbine_llm.layers import *
+from turbine_llm.types import *
 
 # TODO: Should be using a base class with the protocol supported.
-from ..models.llama.llama import PagedLlamaModelV1
+from ..models.llama.llama import LlamaModelConfig, PagedLlamaModelV1
 
 
 def main():
@@ -24,10 +25,10 @@ def main():
     args = cli.parse(parser)
 
     data_files = cli.get_gguf_data_files(args)
-    dataset = gguf.load_file(data_files["gguf"])
+    dataset = gguf_interop.load_file(data_files["gguf"])
 
     hp = configs.LlamaHParams.from_gguf_props(dataset.properties)
-    model = PagedLlamaModelV1(dataset.root_theta, hp)
+    model = PagedLlamaModelV1(dataset.root_theta, LlamaModelConfig(hp))
 
     # Unrolling cache updates by batch row makes dynamo sad without an
     # override. There may be a better way to do this.
diff --git a/llm/turbine_llm/examples/validate_llama_ref_model.py b/llm/turbine_llm/examples/validate_llama_ref_model.py
@@ -15,12 +15,13 @@
 import torch
 
 from turbine_llm.layers import *
+from turbine_llm.types import *
 from turbine_llm.models.llama.llama_ref import *
 
 
 def main(args: list[str]):
     torch.no_grad().__enter__()
-    config = gguf.load_file(args[0])
+    config = gguf_interop.load_file(args[0])
     hp = configs.LlamaHParams.from_gguf_props(config.properties)
     model = DirectCacheLlamaModelV1(config.root_theta, hp)
 
diff --git a/llm/turbine_llm/examples/validate_paged_llama_model.py b/llm/turbine_llm/examples/validate_paged_llama_model.py
@@ -9,14 +9,15 @@
 import torch
 
 from turbine_llm.layers import *
+from turbine_llm.types import *
 from turbine_llm.models.llama.llama import *
 
 
 def main(args: list[str]):
     torch.no_grad().__enter__()
-    config = gguf.load_file(args[0])
+    config = gguf_interop.load_file(args[0])
     hp = configs.LlamaHParams.from_gguf_props(config.properties)
-    model = PagedLlamaModelV1(config.root_theta, hp)
+    model = PagedLlamaModelV1(config.root_theta, LlamaModelConfig(hp))
     cache_state = model.cache.paged.allocate(128, torch.float32)
     start_index = 0
     next_batch = torch.tensor(