Cleanup

mgoin · mgoin · commit 0e8fe08462ef · 2024-05-23T17:40:50.000Z
diff --git a/auto_fp8/modeling.py b/auto_fp8/modeling.py
@@ -140,8 +140,6 @@ def get_layers_to_ignore(model, ignore_patterns) -> List[str]:
             if ignore_pattern.startswith(regex_prefix):
                 # check if name matches regex and add to set if true
                 regex_pattern = ignore_pattern[len(regex_prefix) :]
-                print(regex_pattern)
-                print(name)
                 if re.search(regex_pattern, name):
                     ignored_layers.add(name)
             else:
diff --git a/example_dataset.py b/example_dataset.py
@@ -7,6 +7,8 @@
 quantized_model_dir = "Meta-Llama-3-8B-Instruct-FP8"
 
 tokenizer = AutoTokenizer.from_pretrained(pretrained_model_dir, use_fast=True)
+tokenizer.pad_token = tokenizer.eos_token
+
 ds = load_dataset("mgoin/ultrachat_2k", split="train_sft").select(512)
 examples = [tokenizer.apply_chat_template(batch["messages"], tokenize=False) for batch in ds]
 examples = tokenizer(examples, padding=True, truncation=True, return_tensors="pt").to("cuda")
diff --git a/examples/example_mixtral.py b/examples/example_mixtral.py
@@ -7,7 +7,9 @@
 quantized_model_dir = "Mixtral-8x7B-Instruct-v0.1-FP8"
 
 tokenizer = AutoTokenizer.from_pretrained(pretrained_model_dir, use_fast=True)
-ds = load_dataset("mgoin/ultrachat_2k", split="train_sft").select(10)
+tokenizer.pad_token = tokenizer.eos_token
+
+ds = load_dataset("mgoin/ultrachat_2k", split="train_sft").select(range(10))
 examples = [tokenizer.apply_chat_template(batch["messages"], tokenize=False) for batch in ds]
 examples = tokenizer(examples, padding=True, truncation=True, return_tensors="pt").to("cuda")
 
@@ -21,4 +23,4 @@
     pretrained_model_dir, quantize_config=quantize_config
 )
 model.quantize(examples)
-model.save_quantized(quantized_model_dir)
+model.save_quantized(quantized_model_dir)