File tree Expand file tree Collapse file tree 4 files changed +11
-7
lines changed Expand file tree Collapse file tree 4 files changed +11
-7
lines changed Original file line number Diff line number Diff line change 1+ gguf
Original file line number Diff line number Diff line change 1010
1111from shark_turbine .aot import *
1212
13- from ..layers import *
13+ from turbine_llm .layers import *
14+ from turbine_llm .types import *
1415
1516# TODO: Should be using a base class with the protocol supported.
16- from ..models .llama .llama import PagedLlamaModelV1
17+ from ..models .llama .llama import LlamaModelConfig , PagedLlamaModelV1
1718
1819
1920def main ():
@@ -24,10 +25,10 @@ def main():
2425 args = cli .parse (parser )
2526
2627 data_files = cli .get_gguf_data_files (args )
27- dataset = gguf .load_file (data_files ["gguf" ])
28+ dataset = gguf_interop .load_file (data_files ["gguf" ])
2829
2930 hp = configs .LlamaHParams .from_gguf_props (dataset .properties )
30- model = PagedLlamaModelV1 (dataset .root_theta , hp )
31+ model = PagedLlamaModelV1 (dataset .root_theta , LlamaModelConfig ( hp ) )
3132
3233 # Unrolling cache updates by batch row makes dynamo sad without an
3334 # override. There may be a better way to do this.
Original file line number Diff line number Diff line change 1515import torch
1616
1717from turbine_llm .layers import *
18+ from turbine_llm .types import *
1819from turbine_llm .models .llama .llama_ref import *
1920
2021
2122def main (args : list [str ]):
2223 torch .no_grad ().__enter__ ()
23- config = gguf .load_file (args [0 ])
24+ config = gguf_interop .load_file (args [0 ])
2425 hp = configs .LlamaHParams .from_gguf_props (config .properties )
2526 model = DirectCacheLlamaModelV1 (config .root_theta , hp )
2627
Original file line number Diff line number Diff line change 99import torch
1010
1111from turbine_llm .layers import *
12+ from turbine_llm .types import *
1213from turbine_llm .models .llama .llama import *
1314
1415
1516def main (args : list [str ]):
1617 torch .no_grad ().__enter__ ()
17- config = gguf .load_file (args [0 ])
18+ config = gguf_interop .load_file (args [0 ])
1819 hp = configs .LlamaHParams .from_gguf_props (config .properties )
19- model = PagedLlamaModelV1 (config .root_theta , hp )
20+ model = PagedLlamaModelV1 (config .root_theta , LlamaModelConfig ( hp ) )
2021 cache_state = model .cache .paged .allocate (128 , torch .float32 )
2122 start_index = 0
2223 next_batch = torch .tensor (
You can’t perform that action at this time.
0 commit comments