|
| 1 | +# Copyright 2025-present the HuggingFace Inc. team. |
| 2 | +# |
| 3 | +# Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | +# you may not use this file except in compliance with the License. |
| 5 | +# You may obtain a copy of the License at |
| 6 | +# |
| 7 | +# http://www.apache.org/licenses/LICENSE-2.0 |
| 8 | +# |
| 9 | +# Unless required by applicable law or agreed to in writing, software |
| 10 | +# distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | +# See the License for the specific language governing permissions and |
| 13 | +# limitations under the License. |
| 14 | + |
| 15 | +import os |
| 16 | +from dataclasses import dataclass, field |
| 17 | +from typing import Literal, Optional |
| 18 | + |
| 19 | +import torch |
| 20 | +from datasets import load_dataset |
| 21 | +from transformers import AutoModelForCausalLM, AutoTokenizer, HfArgumentParser |
| 22 | +from trl import SFTConfig, SFTTrainer |
| 23 | + |
| 24 | +from peft import MissConfig, get_peft_model |
| 25 | + |
| 26 | + |
| 27 | +@dataclass |
| 28 | +class ScriptArguments(SFTConfig): |
| 29 | + # model configs |
| 30 | + base_model_name_or_path: Optional[str] = field( |
| 31 | + default=None, metadata={"help": "The name or path of the fp32/16 base model."} |
| 32 | + ) |
| 33 | + bits: str = field(default="bf16", metadata={"help": "(`['bf16', 'fp16', fp32]`)"}) |
| 34 | + init_weights: Literal[True, "bat"] = field( |
| 35 | + default=True, |
| 36 | + metadata={ |
| 37 | + "help": ( |
| 38 | + "True -> MiSS efficience and balance; `bat` -> Bat, `mini` -> smaller MiSS efficience and balance" |
| 39 | + ), |
| 40 | + }, |
| 41 | + ) |
| 42 | + miss_r: int = field(default=16) |
| 43 | + merge_and_save: bool = field(default=False) |
| 44 | + # dataset configs |
| 45 | + data_path: str = field(default="imdb", metadata={"help": "Path to the training data."}) |
| 46 | + dataset_split: str = field(default="train[:1%]", metadata={"help": "(`['train', 'test', 'eval']`):"}) |
| 47 | + dataset_field: list[str] = field(default=None, metadata={"help": "Fields of dataset input and output."}) |
| 48 | + |
| 49 | + |
| 50 | +parser = HfArgumentParser(ScriptArguments) |
| 51 | +script_args = parser.parse_args_into_dataclasses()[0] |
| 52 | +print(script_args) |
| 53 | + |
| 54 | +print(f"Load pre-processed residual model in {script_args.bits} bits.") |
| 55 | +if script_args.bits in ["nf4", "fp4", "int8"]: |
| 56 | + print("MiSS currently does not support quantization.") |
| 57 | + |
| 58 | +elif script_args.base_model_name_or_path is not None: |
| 59 | + print(f"No available pre-processed model, manually initialize a MiSS using {script_args.base_model_name_or_path}.") |
| 60 | + model = AutoModelForCausalLM.from_pretrained( |
| 61 | + script_args.base_model_name_or_path, |
| 62 | + torch_dtype=( |
| 63 | + torch.float16 |
| 64 | + if script_args.bits == "fp16" |
| 65 | + else (torch.bfloat16 if script_args.bits == "bf16" else torch.float32) |
| 66 | + ), |
| 67 | + device_map="auto", |
| 68 | + ) |
| 69 | + tokenizer = AutoTokenizer.from_pretrained(script_args.base_model_name_or_path) |
| 70 | + tokenizer.pad_token_id = tokenizer.eos_token_id |
| 71 | + miss_config = MissConfig( |
| 72 | + r=script_args.miss_r, |
| 73 | + target_modules=["q_proj", "o_proj", "k_proj", "v_proj", "gate_proj", "up_proj", "down_proj"], |
| 74 | + bias="none", |
| 75 | + task_type="CAUSAL_LM", |
| 76 | + init_weights=script_args.init_weights, |
| 77 | + ) |
| 78 | + peft_model = get_peft_model(model, miss_config) |
| 79 | + |
| 80 | +print(peft_model) |
| 81 | +peft_model.print_trainable_parameters() |
| 82 | + |
| 83 | +print(f"Training MiSS with trl on the {script_args.data_path}[{script_args.dataset_split}] dataset.") |
| 84 | +dataset = load_dataset(script_args.data_path, split=script_args.dataset_split) |
| 85 | +dataset = dataset.map( |
| 86 | + lambda example: { |
| 87 | + "text": f"### USER: {example[script_args.dataset_field[0]]}\n### ASSISTANT: {example[script_args.dataset_field[1]]}" |
| 88 | + } |
| 89 | +) |
| 90 | + |
| 91 | +trainer = SFTTrainer( |
| 92 | + model=peft_model, |
| 93 | + args=script_args, |
| 94 | + train_dataset=dataset, |
| 95 | + tokenizer=tokenizer, |
| 96 | +) |
| 97 | +trainer.train() |
| 98 | +trainer.save_state() |
| 99 | + |
| 100 | +peft_model.save_pretrained( |
| 101 | + os.path.join(script_args.output_dir, "miss_ft"), |
| 102 | +) |
| 103 | + |
| 104 | +if script_args.merge_and_save: |
| 105 | + model = peft_model.merge_and_unload() |
| 106 | + model.save_pretrained(os.path.join(script_args.output_dir, "miss_merged")) |
| 107 | + tokenizer.save_pretrained(os.path.join(script_args.output_dir, "miss_merged")) |
0 commit comments