From 89047b5231f161c1fa4ba0a851c954d85cab6ef2 Mon Sep 17 00:00:00 2001 From: technicca Date: Mon, 29 Jan 2024 17:03:33 +0300 Subject: [PATCH 1/3] use_reentrant=False in Mistral_GPTQ --- Mistral_7B_Instruct_GPTQ_finetune.ipynb | 1 + 1 file changed, 1 insertion(+) diff --git a/Mistral_7B_Instruct_GPTQ_finetune.ipynb b/Mistral_7B_Instruct_GPTQ_finetune.ipynb index 6fd36e6..9761517 100644 --- a/Mistral_7B_Instruct_GPTQ_finetune.ipynb +++ b/Mistral_7B_Instruct_GPTQ_finetune.ipynb @@ -672,6 +672,7 @@ " warmup_steps=5,\n", " per_device_train_batch_size=1,\n", " gradient_checkpointing=True,\n", + " gradient_checkpointing_kwargs={\"use_reentrant\": False},\n", " gradient_accumulation_steps=4,\n", " max_steps=1000,\n", " learning_rate=2.5e-5,\n", From 70ad82394fc35c243e5a3b5daed88e56741222da Mon Sep 17 00:00:00 2001 From: technicca Date: Mon, 29 Jan 2024 17:23:58 +0300 Subject: [PATCH 2/3] put quantization_config_loading outside the function so its accessible for other functions --- Mistral_7B_Instruct_GPTQ_finetune.ipynb | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/Mistral_7B_Instruct_GPTQ_finetune.ipynb b/Mistral_7B_Instruct_GPTQ_finetune.ipynb index 9761517..f9665a3 100644 --- a/Mistral_7B_Instruct_GPTQ_finetune.ipynb +++ b/Mistral_7B_Instruct_GPTQ_finetune.ipynb @@ -25,7 +25,7 @@ "metadata": {}, "outputs": [], "source": [ - "# !pip install --upgrade trl peft accelerate bitsandbytes datasets auto-gptq optimum -q" + "!pip install --upgrade trl peft accelerate bitsandbytes datasets auto-gptq optimum -q" ] }, { @@ -156,6 +156,15 @@ "metadata": {}, "outputs": [], "source": [ + "tokenizer = AutoTokenizer.from_pretrained(\n", + " pretrained_model_name_or_path,\n", + " padding_side=\"left\",\n", + " add_eos_token=True,\n", + " add_bos_token=True,\n", + ")\n", + "\n", + "quantization_config_loading = GPTQConfig(bits=4, use_exllama=False, tokenizer=tokenizer)\n", + "\n", "def build_qlora_model(\n", " pretrained_model_name_or_path: str = \"TheBloke/Mistral-7B-Instruct-v0.1-GPTQ\",\n", " gradient_checkpointing: bool = True,\n", @@ -182,17 +191,9 @@ "\n", " # In below as well, when using any GPTQ model\n", " # comment-out the quantization_config param\n", - "\n", - " tokenizer = AutoTokenizer.from_pretrained(\n", - " pretrained_model_name_or_path,\n", - " padding_side=\"left\",\n", - " add_eos_token=True,\n", - " add_bos_token=True,\n", - " )\n", + " \n", " tokenizer.pad_token = tokenizer.eos_token\n", "\n", - " quantization_config_loading = GPTQConfig(bits=4, use_exllama=False, tokenizer=tokenizer)\n", - "\n", " model = AutoModelForCausalLM.from_pretrained(\n", " pretrained_model_name_or_path,\n", " # quantization_config=bnb_config,\n", From 6837d24133f9df2dd588622b9ba81d73a9f665ac Mon Sep 17 00:00:00 2001 From: technicca Date: Tue, 30 Jan 2024 00:36:22 +0300 Subject: [PATCH 3/3] fix Mistral DPO --- ...h_DPO_Direct_Preference_Optimization.ipynb | 162 ++++++++---------- 1 file changed, 73 insertions(+), 89 deletions(-) diff --git a/Mistral_7b_FineTuning_with_DPO_Direct_Preference_Optimization.ipynb b/Mistral_7b_FineTuning_with_DPO_Direct_Preference_Optimization.ipynb index fa00422..3cde67f 100644 --- a/Mistral_7b_FineTuning_with_DPO_Direct_Preference_Optimization.ipynb +++ b/Mistral_7b_FineTuning_with_DPO_Direct_Preference_Optimization.ipynb @@ -40,13 +40,12 @@ "from dataclasses import dataclass, field\n", "from typing import Any, Dict, List, NewType, Optional, Tuple\n", "import transformers\n", - "from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, BitsAndBytesConfig\n", + "from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, BitsAndBytesConfig, pipeline\n", "from datasets import load_dataset\n", "from peft import LoraConfig, PeftModel, get_peft_model, prepare_model_for_kbit_training\n", "from trl import DPOTrainer\n", "import bitsandbytes as bnb\n", - "from google.colab import\n", - "\n", + "import google.colab\n", "\n", "model_name = \"teknium/OpenHermes-2.5-Mistral-7B\"\n", "\n", @@ -209,94 +208,79 @@ "metadata": {}, "outputs": [], "source": [ - "def train(model_name,\n", - " dataset,\n", - " tokenizer,\n", - " new_model,\n", - " #wandb_project: str = \"\",\n", - " #wandb_run_name: str = \"\",\n", - " #wandb_watch: str = \"\", # options: false | gradients | all\n", - " #wandb_log_model: str = \"\", # options: false | true\n", - " ):\n", - " peft_config = LoraConfig(\n", - " r=16,\n", - " lora_alpha=16,\n", - " lora_dropout=0.05,\n", - " bias=\"none\",\n", - " task_type=\"CAUSAL_LM\",\n", - " target_modules: List[str] =['k_proj', 'gate_proj', 'v_proj', 'up_proj', 'q_proj', 'o_proj', 'down_proj']\n", - " )\n", - " assert (\n", - " model_name\n", - " ), \"Please specify a --base_model, e.g. --base_model='huggyllama/llama-7b'\"\n", - "\n", - " # Check if parameter passed or if set within environ\n", - " '''\n", - " use_wandb = len(wandb_project) > 0 or (\n", - " \"WANDB_PROJECT\" in os.environ and len(os.environ[\"WANDB_PROJECT\"]) > 0\n", - " )\n", - " # Only overwrite environ if wandb param passed\n", - " if len(wandb_project) > 0:\n", - " os.environ[\"WANDB_PROJECT\"] = wandb_project\n", - " if len(wandb_watch) > 0:\n", - " os.environ[\"WANDB_WATCH\"] = wandb_watch\n", - " if len(wandb_log_model) > 0:\n", - " os.environ[\"WANDB_LOG_MODEL\"] = wandb_log_model\n", - " '''\n", - "\n", - " # Base Model\n", - " model = AutoModelForCausalLM.from_pretrained(\n", - " model_name,\n", - " torch_dtype=torch.float16,\n", - " load_in_4bit=True\n", - " )\n", - " model.config.use_cache = False\n", + "# Train the model\n", + "\n", + "peft_config = LoraConfig(\n", + " r=16,\n", + " lora_alpha=16,\n", + " lora_dropout=0.05,\n", + " bias=\"none\",\n", + " task_type=\"CAUSAL_LM\",\n", + " target_modules =['k_proj', 'gate_proj', 'v_proj', 'up_proj', 'q_proj', 'o_proj', 'down_proj']\n", + ")\n", "\n", - " # Reference model\n", - " ref_model = AutoModelForCausalLM.from_pretrained(\n", - " model_name,\n", - " torch_dtype=torch.float16,\n", - " load_in_4bit=True\n", - " )\n", + "assert (\n", + " model_name\n", + "), \"Please specify a model in the first cell\"\n", "\n", - " # Training arguments\n", - " training_args = DPOConfig(\n", - " num_train_epochs=3,\n", - " per_device_train_batch_size=1,\n", - " gradient_accumulation_steps=4,\n", - " gradient_checkpointing=True,\n", - " learning_rate=5e-5,\n", - " lr_scheduler_type=\"linear\",\n", - " max_steps=200,\n", - " save_strategy=\"no\",\n", - " logging_steps=1,\n", - " output_dir=new_model,\n", - " optim=\"paged_adamw_32bit\",\n", - " warmup_steps=100,\n", - " fp16=True,\n", - " # report_to=\"wandb\",\n", - " )\n", + "# Check if parameter passed or if set within environ\n", + "'''\n", + "use_wandb = len(wandb_project) > 0 or (\n", + " \"WANDB_PROJECT\" in os.environ and len(os.environ[\"WANDB_PROJECT\"]) > 0\n", + ")\n", + "# Only overwrite environ if wandb param passed\n", + "if len(wandb_project) > 0:\n", + " os.environ[\"WANDB_PROJECT\"] = wandb_project\n", + "if len(wandb_watch) > 0:\n", + " os.environ[\"WANDB_WATCH\"] = wandb_watch\n", + "if len(wandb_log_model) > 0:\n", + " os.environ[\"WANDB_LOG_MODEL\"] = wandb_log_model\n", + "'''\n", + "\n", + "# Base Model\n", + "model = AutoModelForCausalLM.from_pretrained(\n", + " model_name,\n", + " torch_dtype=torch.float16,\n", + " load_in_4bit=True\n", + ")\n", + "model.config.use_cache = False\n", "\n", - " dpo_trainer = DPOTrainer(\n", - " model,\n", - " ref_model,\n", - " args=training_args,\n", - " train_dataset=dataset,\n", - " tokenizer=tokenizer,\n", - " peft_config=peft_config,\n", - " beta=0.1,\n", - " max_prompt_length=1024,\n", - " max_length=1536,\n", - " )" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "dpo_trainer.train(model_name, dataset, tokenizer, new_model)" + "# Reference model\n", + "ref_model = AutoModelForCausalLM.from_pretrained(\n", + " model_name,\n", + " torch_dtype=torch.float16,\n", + " load_in_4bit=True\n", + ")\n", + "\n", + "# Training arguments\n", + "training_args = DPOConfig(\n", + " num_train_epochs=3,\n", + " per_device_train_batch_size=1,\n", + " gradient_accumulation_steps=4,\n", + " gradient_checkpointing=True,\n", + " learning_rate=5e-5,\n", + " lr_scheduler_type=\"linear\",\n", + " max_steps=200,\n", + " save_strategy=\"no\",\n", + " logging_steps=1,\n", + " output_dir=new_model,\n", + " optim=\"paged_adamw_32bit\",\n", + " warmup_steps=100,\n", + " fp16=True,\n", + " # report_to=\"wandb\",\n", + ")\n", + "\n", + "dpo_trainer = DPOTrainer(\n", + " model,\n", + " ref_model,\n", + " args=training_args,\n", + " train_dataset=dataset,\n", + " tokenizer=tokenizer,\n", + " peft_config=peft_config,\n", + " beta=0.1,\n", + " max_prompt_length=1024,\n", + " max_length=1536,\n", + ")" ] }, { @@ -366,7 +350,7 @@ " prompt = tokenizer.apply_chat_template(message, add_generation_prompt=True, tokenize=False)\n", "\n", " chat_pipeline = pipeline(\n", - " \"text-generation\",\n", + " task=\"text-generation\",\n", " model=new_model,\n", " tokenizer=tokenizer\n", " )\n",