Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
808 changes: 553 additions & 255 deletions convert-dense.py

Large diffs are not rendered by default.

28 changes: 17 additions & 11 deletions convert-hf-to-powerinfer-gguf.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,10 @@ def __init__(
self.hparams = Model.load_hparams(self.dir_model)
self.model_arch = self._get_model_architecture()
self.gguf_writer = gguf.GGUFWriter(
fname_out, gguf.MODEL_ARCH_NAMES[self.model_arch], endianess=self.endianess, use_temp_file = False
fname_out,
gguf.MODEL_ARCH_NAMES[self.model_arch],
endianess=self.endianess,
use_temp_file=False,
)

def set_vocab(self):
Expand Down Expand Up @@ -517,6 +520,7 @@ def write_tensors(self):

self.gguf_writer.add_tensor(new_name, data)


class OptModel(Model):
def set_gguf_parameters(self, params: PredictorParams):
self.gguf_writer.add_name("opt")
Expand All @@ -527,20 +531,20 @@ def set_gguf_parameters(self, params: PredictorParams):
self.gguf_writer.add_head_count(self.hparams["num_attention_heads"])
# self.gguf_writer.add_vocab_size(self.hparams["vocab_size"])
self.gguf_writer.add_file_type(self.ftype)

if params.sparse_threshold is not None:
self.gguf_writer.add_sparse_threshold(params.sparse_threshold)

def write_tensors(self):
for name, data_torch in self.get_tensors():
old_dtype = data_torch.dtype

# convert any unsupported data types to float32
if data_torch.dtype not in (torch.float16, torch.float32):
data_torch = data_torch.to(torch.float32)

data = data_torch.squeeze().numpy()

# map tensor names
new_name = self._translate_tensor_key(name)
if new_name is None:
Expand All @@ -552,8 +556,8 @@ def write_tensors(self):
if "ffn_down" in new_name:
new_name = new_name.replace("ffn_down", "ffn_down_t")
data = data.T
n_dims = len(data.shape)

n_dims = len(data.shape)
data_dtype = data.dtype

# if f32 desired, convert any float16 to float32
Expand All @@ -570,11 +574,12 @@ def write_tensors(self):
and n_dims == 2
):
data = data.astype(np.float16)

print(f"{new_name}, n_dims = {n_dims}, {old_dtype} --> {data.dtype}")

self.gguf_writer.add_tensor(new_name, data)


@dataclass
class PredictorParams:
sparse_threshold: float | None = None
Expand All @@ -583,12 +588,12 @@ class PredictorParams:
def loadPredictorJson(config_path: Path) -> PredictorParams:
config = json.load(open(config_path))
return PredictorParams(
sparse_threshold = config.get("sparse_threshold"),
sparse_threshold=config.get("sparse_threshold"),
)

@staticmethod
def load(model_instance: Model) -> PredictorParams:
config_path = model_instance.dir_mlp_pred / "config.json"
config_path = model_instance.dir_mlp_pred / "config.json"

if config_path.exists():
params = PredictorParams.loadPredictorJson(config_path)
Expand All @@ -597,6 +602,7 @@ def load(model_instance: Model) -> PredictorParams:

return params


###### CONVERSION LOGIC ######


Expand Down
Loading