diff --git a/examples/converters/gguf2pte.py b/examples/converters/gguf2pte.py new file mode 100644 index 00000000000..74fbc171c9d --- /dev/null +++ b/examples/converters/gguf2pte.py @@ -0,0 +1,38 @@ +''' +Example to convert .gguf files into .pte format. + +1. Load our model using transformers/gguf +2. Torch export +3. Executorch lowering and export to .pte +''' +from transformers import AutoTokenizer, AutoModelForCausalLM +from executorch.exir import to_edge_transform_and_lower +from executorch.backends.xnnpack.partition.xnnpack_partitioner import XnnpackPartitioner +from torch.export import export +import torch + +model_id = "bartowski/SmolLM2-135M-Instruct-GGUF" # Here we would have our HF model in GGUF form we wish to convert +filename = "SmolLM2-135M-Instruct-Q8_0.gguf" + +tokenizer = AutoTokenizer.from_pretrained(model_id, gguf_file=filename) +model = AutoModelForCausalLM.from_pretrained(model_id, gguf_file=filename) +print(f"Model weights dtype: {model.dtype}") +model.eval() + +# Generate some sample input for our torch export +sample_inputs = tokenizer("Plants create energy through a process known as", return_tensors="pt",) +print(sample_inputs) +print(sample_inputs["input_ids"].shape) +print(sample_inputs["attention_mask"].shape) + +sample_inputs = (sample_inputs["input_ids"], sample_inputs["attention_mask"],) + +# Torch export followed by ET lowering and export +exported_program = export(model, sample_inputs) +executorch_program = to_edge_transform_and_lower( + exported_program, + partitioner = [XnnpackPartitioner()] +).to_executorch() + +with open("model.pte", "wb") as file: + file.write(executorch_program.buffer) diff --git a/examples/converters/requirements.txt b/examples/converters/requirements.txt new file mode 100644 index 00000000000..9e2ead3a5b0 --- /dev/null +++ b/examples/converters/requirements.txt @@ -0,0 +1,6 @@ +accelerate +gguf +setuptools +transformers +executorch +torch