Enable TinyLLAMAs quantization (#151)

malfet · web-flow · commit 7d4527002e70 · 2024-04-05T11:20:21.000-07:00
Copy-n-paste code from 11ce176 into `quantize.py`
diff --git a/quantize.py b/quantize.py
@@ -552,6 +552,8 @@ def quantize(
         model = Transformer.from_name(checkpoint_path.parent.name)
 
     checkpoint = torch.load(str(checkpoint_path), mmap=True, weights_only=True)
+    if "model" in checkpoint and "stories" in str(checkpoint_path):
+        checkpoint = checkpoint["model"]
     model.load_state_dict(checkpoint, assign=True)
     model = model.to(dtype=precision, device=device)