Fixes for eval and GPTQ after move to gpt-fast

HDCharles · HDCharles · commit f44ef4eb55b5 · 2024-01-31T14:46:43.000-08:00
Summary: the move from simple_gpt to gpt-fast altered some things. This unbreaks eval and GPTQ. Note GPTQ still is broken due to kv cache issue in model. Needs either non-public pytorch functionality or a change to GPTQ implementation. see next PR in stack for a fix. Test Plan: python eval.py --checkpoint_path checkpoints/$MODEL_REPO/model.pth python quantize.py --checkpoint_path checkpoints/$MODEL_REPO/model.pth --mode int4-gptq --calibration_tasks wikitext --calibration_limit 5 Reviewers: Subscribers: Tasks: Tags: ghstack-source-id: 7e73383 Pull Request resolved: #93
diff --git a/GPTQ.py b/GPTQ.py
@@ -91,7 +91,7 @@ def device(self):
 
         def tok_encode(self, string: str):
             encoded = encode_tokens(
-                self._tokenizer, string, bos=True, eos=False, device=self._device
+                self._tokenizer, string, bos=True, device=self._device
             )
             # encoded is a pytorch tensor, but some internal logic in the
             # eval harness expects it to be a list instead
diff --git a/eval.py b/eval.py
@@ -28,7 +28,7 @@
 
 from sentencepiece import SentencePieceProcessor
 
-from model import LLaMA
+from model import Transformer
 
 lm_evaluation_harness_path = '/'.join(
     os.getcwd().split('/')[:-1] + ['lm-evaluation-harness'])
@@ -40,7 +40,7 @@
 
 
 def setup_cache_padded_seq_input_pos_max_seq_length_for_prefill(
-    model: LLaMA,
+    model: Transformer,
     prompt: torch.Tensor,
     max_new_tokens: int,
     max_seq_length: Optional[int] = None,
@@ -77,13 +77,13 @@ def setup_cache_padded_seq_input_pos_max_seq_length_for_prefill(
 
     return seq, input_pos, max_seq_length
 
-class SimpleGPTEvalWrapper(lm_eval.base.BaseLM):
+class GPTFastEvalWrapper(lm_eval.base.BaseLM):
     """
-    A wrapper class for SimpleGPT, providing integration with the lm-evaluation-harness library.
+    A wrapper class for GPTFast, providing integration with the lm-evaluation-harness library.
     """
     def __init__(
         self,
-        model: LLaMA,
+        model: Transformer,
         tokenizer,
         max_seq_length: Optional[int]=None,
     ):
@@ -115,7 +115,7 @@ def device(self):
 
     def tok_encode(self, string: str):
         encoded = encode_tokens(self._tokenizer,
-            string, bos=True, eos=False, device=self._device)
+            string, bos=True, device=self._device)
         # encoded is a pytorch tensor, but some internal logic in the
         # eval harness expects it to be a list instead
         # TODO: verify this for multi-batch as well
@@ -148,7 +148,7 @@ def _model_generate(self, context, max_length, eos_token_id):
 
 @torch.no_grad()
 def eval(
-    model: LLaMA,
+    model: Transformer,
     tokenizer,
     tasks: list = ["hellaswag"],
     limit: Optional[int] = None,
@@ -158,7 +158,7 @@ def eval(
     Evaluates a language model on a specified task using the lm-evaluation-harness library.
 
     Args:
-        model (LLaMA): The pre-trained language model to evaluate.
+        model (Transformer): The pre-trained language model to evaluate.
         tokenizer: The tokenizer to use for encoding/decoding text.
         task (str): The name of the evaluation task to perform.
         limit (Optional[int]): The maximum number of samples to evaluate (None for all available).
@@ -167,7 +167,7 @@ def eval(
     Returns:
         eval_results (dict): A dictionary of evaluation results for the specified task(s).
     """
-    model_eval_wrapper = SimpleGPTEvalWrapper(
+    model_eval_wrapper = GPTFastEvalWrapper(
         model,
         tokenizer,
         max_seq_length,

Original file line number	Diff line number	Diff line change
`@@ -91,7 +91,7 @@ def device(self):`
`91`	`91`
`92`	`92`	`def tok_encode(self, string: str):`
`93`	`93`	`encoded = encode_tokens(`
`94`		`- self._tokenizer, string, bos=True, eos=False, device=self._device`
	`94`	`+ self._tokenizer, string, bos=True, device=self._device`
`95`	`95`	`)`
`96`	`96`	`# encoded is a pytorch tensor, but some internal logic in the`
`97`	`97`	`# eval harness expects it to be a list instead`