improving GPTQ defauls

HDCharles · HDCharles · commit 40ff31458bef · 2024-02-09T15:10:23.000-08:00
Summary: previously wikitext was default task for GPTQ which with other defaults, wouldn't collect any examples. also improved error message. Test Plan: python quantize.py --checkpoint_path checkpoints/$MODEL_REPO/model.pth --mode 8da4w-gptq --calibration_limit 5 python quantize.py --checkpoint_path checkpoints/$MODEL_REPO/model.pth --mode 8da4w-gptq --calibration_limit 5 --calibration_tasks hellaswag Reviewers: Subscribers: Tasks: Tags: ghstack-source-id: 9275ece Pull Request resolved: #104
diff --git a/quantize.py b/quantize.py
@@ -263,6 +263,11 @@ def get_inputs(model, tokenizer, calibration_tasks, calibration_limit, calibrati
             limit=calibration_limit,
         )
         inputs = input_recorder.get_recorded_inputs()
+        assert inputs is not None, (
+            f"No inputs were collected, use a task other than {calibration_tasks}, "+
+            f"use option pad_calibration_inputs, or decrease calibration_sequence_length (currently "+
+            f"{calibration_seq_length})"
+        )
         print(f"Obtained {len(inputs[0].values)} calibration samples")
         return inputs
 
@@ -597,7 +602,7 @@ def quantize(
     parser.add_argument('--checkpoint_path', type=Path, default=Path("checkpoints/meta-llama/Llama-2-7b-chat-hf/model.pth"), help='Path to the model checkpoint to be quantized.')
     parser.add_argument('--mode', '-q', type=str, default='int8', choices=['int8', 'int4', 'int4-gptq'], help='type of quantization to perform')
     parser.add_argument('--groupsize', type=int, default=32, help='Group size for int4 quantization.')
-    parser.add_argument('--calibration_tasks', type=str, nargs='+', default=['hellaswag'], help='tasks to do gptq calibration on, if doing gptq')
+    parser.add_argument('--calibration_tasks', type=str, nargs='+', default=['wikitext'], help='tasks to do gptq calibration on, if doing gptq')
     parser.add_argument('--calibration_limit', type=int, default=1000, help='number of samples to use for gptq calibration')
     parser.add_argument('--calibration_seq_length', type=int, default=100, help='length of sequences to use for gptq calibration')
     parser.add_argument('--pad_calibration_inputs', type=bool, default=False, help='pads sequences shorter than calibration_seq_length to that length, yielding more calibration inputs but running much slower')